# Directory structure
github_dir <- file.path("G:/Shared drives/Nord Lab - Computational Projects/MIA/Canales_eLIFE_2021_WGCNA_GO")

# This directory is available in the repository dedicated to differential expression
extsub_dir <- file.path("G:/Shared drives/Nord Lab - Computational Projects/MIA/Canales_et_al_eLIFE_DE_repo/Canales_eLIFE_2021_DE/Extra_submission files")


setwd(github_dir)


# Global R markdown code chunk options
knitr::opts_chunk$set(message=FALSE, 
                      warning = FALSE, 
                      error=FALSE, 
                      echo=TRUE, 
                      cache=FALSE, 
                      fig.width = 7, fig.height = 6, 
                      fig.align = 'left')

# R packages
library(sva)
library(RUVnormalize)
library(pheatmap)
library(edgeR)
library(GenomicFeatures)
library(mclust)
library(parallel)
library(ggplot2)
library(reshape2)
library(dplyr)
library(data.table)
library(DT)
library(RColorBrewer)
library(knitr)
library(ggrepel)
library(gridExtra)
library(grid)
library(plyr)
library(plotly)
library(Hmisc)
library(WGCNA)
library(knitr)
# Loads environment objects from the DE analysis.
#load("G:/Shared drives/Nord Lab - Computational #Projects/MIA/Canales_et_al_eLIFE_DE_repo/Canales_eLIFE_2021_DE/Canales_2021_eLIFE_DE_workspace.RData")

1. WGCNA

WGCNA approach run on a full rpkm data set consisting of 24015 genes. Genes with low expression are excluded from the analysis after the network construction, limiting the gene set to 17195.

# Note: Some of the comments were copied/pasted from WGCNA documentation.

# Reads Gene counts, 24015 genes x 74 samples
exp.original <- read.csv(paste0(extsub_dir,"/", "Gene_counts.csv"))
rownames(exp.original) <- exp.original$X
exp.original$X <- NULL

# Reading metadata
sample.info <- read.csv(paste0(extsub_dir,"/", "metadata.csv"))

options(stringsAsFactors = FALSE)
#allowWGCNAThreads()  # Multi-threading within compiled code is not available on Windows 

# Gene lengths calculation
load(paste0(extsub_dir,"/","exonic.gene.sizes"))

gene.lengths <- as.numeric(lapply(1:nrow(exp.original), function(x) FUN= as.numeric(exonic.gene.sizes[rownames(exp.original)[x]])))

# Calculating differences in rpkm values
rpkm.data <- rpkm(exp.original, gene.length=gene.lengths, log=T, prior.count=0.25)

# Note: There is a minor discrepancy between the rpkm values calculated above, and the values calculated and used during the development phase of this WGCNA analysis. On average, gene rpkm values differ by 0.0004538474, with a range of 1.312417e-09 to 1.632966e-03. These are biologically negligible differences, but they interfere with the reproducibility of some of the plots presented in the manuscript. In order to maintain reproducibility the object rpkm.data_old.RData is loaded from the harddrive. Column names used during the development phase of the project are translated to the current format using the dict data frame. Update from R3.x.x to R 4.0.2, or an update of the edgeR::rpkm() function, resulting in a change in how floating point numbers are handled, is the most likely reason of issue.

load("rpkm.data_old.RData")

dict <- read.csv("sample_dict.csv")
dict$X <- NULL

# This sapply function calculates differences between two rpkm() results.
# It's commented out as a not critical part of this report 
#rpkm_new_old_diffs <- sapply(1:24015, function(x){

#df1 <- as.data.frame(reshape2::melt(rpkm.data[x,]))
#df1 <- data.frame("sample_new" = rownames(df1), "value" = df1$value)

#df2 <- reshape2::melt(rpkm.data_old[x,])
#df2 <- data.frame("sample_old" = paste0("X", rownames(df2)), "value" = df2$value)
#df2 <- merge(df2, dict, by.x = "sample_old", by.y = "sample_old")[,c(3,2)]

#diff_df <- merge(df1, df2, by.x = "sample_new", by.y = "sample_new")
#diff_df$value.x - diff_df$value.y

#})

#mean(rpkm_new_old_diffs)
#range(rpkm_new_old_diffs)

dict <- read.csv("sample_dict.csv")
dict$X <- NULL
dict$sample_old <- gsub('^\\X|\\.$', '', dict$sample_old)

#colnames(rpkm.data_old) 
#all(colnames(rpkm.data_old) %in% dict$sample_old)  # TRUE

# Renaming columns in the old rpkm data
colnames(rpkm.data_old) <- plyr::mapvalues(colnames(rpkm.data_old), from = dict$sample_old, to = dict$sample_new)

# Checking column order
# data.frame("meta" = sample.info$Sample_ID, colnames(rpkm.data_old))

# Reordering columns to match them with the order in sample.info
col_order <- sample.info$Sample_ID
rpkm.data_old <- rpkm.data_old[, col_order]

# Column order matches
#data.frame("meta" = sample.info$Sample_ID, colnames(rpkm.data_old))

metData <- sample.info
rnaData <- rpkm.data_old
raw_counts_Data <- exp.original  #I'm running the analysis using all 24015 genes.  

sampleNames <- names(data.frame(rnaData))

# CHECK SAMPLES
#print(dim(rnaData)) # [1] 24015 genes, 74 samples
#print(dim(metData)) # [1] 74 samples, 15 characteristics

datExpr <- data.frame(rnaData) 
#table(dimnames(t(datExpr))[[1]]==metData$Sample_ID) # TRUE 74


# WGCNA ROUND 1
## Full Dataset
dir.create("./round1-signed/data", recursive = T, showWarnings = FALSE)
dir.create("./round1-signed/figures", recursive = T, showWarnings = FALSE)
dir.create("./round1-signed/tables", recursive = T, showWarnings = FALSE)


# DETERMINE IF ANY GENE VALUES ARE MISSING AND REMOVES THOSE THAT ARE
gsg = goodSamplesGenes(datExpr, verbose = 0); # make sure all genes are OK (no missing or 0 variance)
#gsg$allOK; # [1] TRUE if all genes are fine 
if (!gsg$allOK) {
  # print the gene and sample names that were removed
  if (sum(!gsg$goodGenes)>0) 
    printFlush(paste("Removing genes:", paste(names(datExpr)[!gsg$goodGenes], collapse = ", ")));
  if (sum(!gsg$goodSamples)>0) 
    printFlush(paste("Removing samples:", paste(rownames(datExpr)[!gsg$goodSamples], collapse = ", ")));
  # remove the offending genes and samples from the data
  datExpr = datExpr[gsg$goodSamples, gsg$goodGenes];
}

1.1 Outlier detection

sampleTree = hclust(dist(t(datExpr)), method = "average");

# Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# The user should change the dimensions if the window is too large or too small.

#sizeGrWindow(12,9)
#pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, 
     cex.axis = 1.5, cex.main = 2)
No outlier samples have been detected.

No outlier samples have been detected.

# Trim the samples if necessary using the cutreeStatic (in the beginning of the tutorial).
# Trimming was not necessary for this dataset 

1.2 Sample dendrogram and trait heatmap

 # compare samples to traits
 sampleTree2 = hclust(dist(t(datExpr)), method = "average")
 metData2 = metData[,c("Sample_ID", "DPC", "Condition", "Response", "sex.by.rna", "Lane", "Litter")]
 rownames(metData2) = metData2$Sample_ID
 metData2$Sample_ID = NULL
 
 # Re-cluster samples
 sampleTree2 = hclust(dist(t(datExpr)), method = "average")
 
 # Convert traits to a color representation: white means low, red means high, grey means missing entry
 m_col <- data.frame("Condition" = as.numeric(ifelse(metData2$Condition=="Saline", 0,1)),
                     "Sex" = as.numeric(ifelse(metData2$sex.by.rna=="M", 1,0)))
                     
 traitColors = numbers2colors(m_col, signed = T)
 colnames(traitColors) <- c("Condition", "Sex")
 
 par(cex = 0.6);
 par(mar = c(0,4,2,0))
 # Plot the sample dendrogram and the colors underneath.
 plotDendroAndColors(sampleTree2, traitColors,
                      groupLabels = metData2$Sample_ID,
                      main = "Sample dendrogram and trait heatmap")
Sample dendrogram and trait heatmap. Red color indicates polyI:C treatment or males.

Sample dendrogram and trait heatmap. Red color indicates polyI:C treatment or males.

1.3 Estimation of the soft-threshold power

# Choose a set of soft-thresholding powers
 powers = c(c(1:10), seq(from = 12, to=20, by=2))

# Call the network topology analysis function
# It's conditionally loaded from the file to increase efficiency of writing this report
 if(file.exists("sft.RData")){
  load("sft.RData")
  } else {
  sft = pickSoftThreshold(t(datExpr), powerVector = powers, verbose = 5)
  }  

 # Plot the results:
 #sizeGrWindow(9, 5)
 #par(mfrow = c(1,2));
 cex1 = 0.9;
 
 # Scale-free topology fit index as a function of the soft-thresholding power
 plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
      xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
      main = paste("Scale independence"));
      text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
      labels=powers,cex=cex1,col="red");
 # this line corresponds to using an R^2 cut-off of h
 abline(h=0.90,col="red")
 
 
 # Mean connectivity as a function of the soft-thresholding power
 plot(sft$fitIndices[,1], sft$fitIndices[,5],
      xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
      main = paste("Mean connectivity"))
      text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
We choose the soft power 3, which is the lowest power at which the scale-free topology reaches high R^2We choose the soft power 3, which is the lowest power at which the scale-free topology reaches high R^2

We choose the soft power 3, which is the lowest power at which the scale-free topology reaches high R^2

1.4 Network construction and module detection

# Automatic network construction and module detection

# Analyzing a dataset with max BlockSize=25000 on a laptop with 16 GB of RAM works just fine, 
# but takes a substantial amount of time. ~90 min on Intel(R) Core(TM) i7-5600U CPU.
# From the documentation: "A 16GB workstation should handle up to 20000 probes; a 32GB workstation should handle perhaps 30000."

# I saved the net4.RData object to speed up knitting this report, and decrease carbon footprint. 

if(file.exists("net4.RData")){
  
  load("net4.RData")
  
  } else {
  
  # Automatic network construction and module detection
  tmpMulti = as.data.frame(t(datExpr))
      
  net = blockwiseModules(datExpr=tmpMulti, checkMissingData=TRUE, minModuleSize=10,
                        maxBlockSize=25000,  corType="bicor", power = 3,
                        networkType="signed", TOMType = "signed", saveTOMs=TRUE,
                        saveTOMFileBase=paste("./round1-signed/data/signed-round1"),
                        nThreads=4, verbose=Inf, mergeCutHeight = 0.15, 
                        pamStage=FALSE, reassignThreshold=1e-10, deepSplit=2)

  #save(net, file = "net4.RData")
  
}
# Module-trait association testing - full 24015 gene set

 #open a graphics window
 #sizeGrWindow(12, 9)
 
# Convert labels to colors for plotting
 mergedColors = net$colors
 # Plot the dendrogram and the module colors underneath
 plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                     "Module colors", 
                     dendroLabels = FALSE, hang = 0.03,
                     addGuide = TRUE, guideHang = 0.05)
Fig. 2a. WGCNA cluster dendrogram of time series samples identifies genes that were assigned into co-expression modules. Based on similarities in module gene expression, six of the original 10 modules were grouped into two larger modules, BrRePi: Brown, Red and Pink; YeMaBl: Yellow, Magenta and Black.

Fig. 2a. WGCNA cluster dendrogram of time series samples identifies genes that were assigned into co-expression modules. Based on similarities in module gene expression, six of the original 10 modules were grouped into two larger modules, BrRePi: Brown, Red and Pink; YeMaBl: Yellow, Magenta and Black.

 moduleLabels = net$colors
 moduleColors = net$colors
 MEs = net$MEs;
 geneTree = net$dendrograms[[1]]


datTraits <- data.frame(DPC=metData$DPC, 
                        Condition=ifelse(metData$Condition=="PolyIC", 1, 0),
                        Sex=ifelse(metData$sex.by.rna == "M", 1, 0))
 
 # Define numbers of genes and samples
 nGenes = ncol(t(datExpr));
 nSamples = nrow(t(datExpr));
 # Recalculate MEs with color labels
 
 MEs0 = moduleEigengenes(t(datExpr), moduleColors)$eigengenes
 
 
 MEs = orderMEs(MEs0)
 moduleTraitCor = cor(MEs, datTraits, use = "p");
 moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
 
 #knitr::kable(moduleTraitPvalue)
 
 #reading the table. We color code each association by the correlation value:

 #sizeGrWindow(10,6)
 # Will display correlations and their p-values
 textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
                    signif(moduleTraitPvalue, 1), ")", sep = "");
 dim(textMatrix) = dim(moduleTraitCor)
 par(mar = c(6, 8.5, 3, 3));
 # Display the correlation values within a heatmap plot
 labeledHeatmap(Matrix = moduleTraitCor,
                xLabels = names(datTraits),
                yLabels = names(MEs),
                ySymbols = names(MEs),
                colorLabels = FALSE,
                colors = colorRampPalette(c("green", "white", "red"))(1000),
                textMatrix = textMatrix,
                setStdMargins = FALSE,
                cex.text = 1,
                zlim = c(-1,1),
                main = paste("Module-trait relationships"))
Module-trait association testing. Full 24015 gene set. Numerical values represent signed Pearson’s correlation coefficients, with Student asymptotic p values in brackets. Green represents negative and red represents positive correlation. Color intensity signifies the strength of the correlation.

Module-trait association testing. Full 24015 gene set. Numerical values represent signed Pearson’s correlation coefficients, with Student asymptotic p values in brackets. Green represents negative and red represents positive correlation. Color intensity signifies the strength of the correlation.

# We quantify associations of individual genes with our trait of interest  by defining Gene Significance GS as (the absolute value of) the correlation between the gene and the trait. For each module, we also define a quantitative measure of Module Membership (MM) as the correlation of the module eigengene and the gene expression profile. This allows us to quantify the similarity of all genes on the array to every module.
# Define variable weight containing the weight column of datTrait
 
 modNames = substring(names(MEs), 3)
 geneModuleMembership = as.data.frame(cor(t(datExpr), MEs, use = "p"));
 MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
 
 names(geneModuleMembership) = paste("MM", modNames, sep="");
 names(MMPvalue) = paste("p.MM", modNames, sep="");
 geneTraitSignificance = as.data.frame(cor(t(datExpr), datTraits, use = "p"));
 
 GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
 names(geneTraitSignificance) = paste("GS.", names(datTraits), sep="");
 names(GSPvalue) = paste("p.GS.", names(datTraits), sep="")
 
 #head(GSPvalue)
 #moduleTraitPvalue
 # merging geneModuleMembership with GSPvalue - gene significance value for the association with a trait. 
 geneModuleMembership$gene_name <-rownames(geneModuleMembership) 
 GSPvalue$gene_name <- rownames(GSPvalue)
 merged_MM_and_GS <- merge(geneModuleMembership, GSPvalue, by="gene_name")
 
 geneTraitSignificance$gene_name <- rownames(geneTraitSignificance)
 rownames(geneTraitSignificance) <- NULL
 merged_MM_and_GS <- merge(merged_MM_and_GS, geneTraitSignificance, by="gene_name")
 
 colors_df <- data.frame(gene_name=rownames(datExpr), moduleColors=moduleColors) # I think the order is correct, but have to verify
 merged_MM_and_GS <- merge(merged_MM_and_GS, colors_df, by="gene_name")
 
 MMPvalue$gene_name <- rownames(MMPvalue)
 rownames(MMPvalue) <- NULL
 merged_MM_and_GS <- merge(merged_MM_and_GS, MMPvalue, by="gene_name")

1.5 Merged and filtered modules

# open a graphics window
 #sizeGrWindow(12, 9)
 # Convert labels to colors for plotting
 #mergedColors = net$colors
 # Plot the dendrogram and the module colors underneath
 #plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
 #                     "Module colors", 
 #                     dendroLabels = FALSE, hang = 0.03,
 #                    addGuide = TRUE, guideHang = 0.05)
 
 moduleLabels = net$colors
 moduleColors = net$colors
 
#I'm editing this script to merge modules containing genes with similar expression trajectory and  function.
 
 #Brown + Red + Pink = BrRePi, Yellow + Magenta + Black = YeMaBl
 moduleColors_merged= ifelse(moduleColors=="brown", "BrRePi", moduleColors)
 moduleColors_merged= ifelse(moduleColors=="red", "BrRePi", moduleColors_merged)
 moduleColors_merged= ifelse(moduleColors=="pink", "BrRePi", moduleColors_merged)
 
moduleColors_merged= ifelse(moduleColors=="yellow", "YeMaBl", moduleColors_merged)
moduleColors_merged= ifelse(moduleColors=="magenta", "YeMaBl", moduleColors_merged)
moduleColors_merged= ifelse(moduleColors=="black", "YeMaBl", moduleColors_merged)

moduleLabels= moduleColors_merged
moduleColors = moduleColors_merged


 MEs = net$MEs;
 geneTree = net$dendrograms[[1]];
 #save(MEs, moduleLabels, moduleColors, geneTree,
 #      file = "MIA_Karol_preliminary_-networkConstruction-auto.RData")
#I'm filtering the same 17195 genes as used in the DE analysis

datExpr <- rnaData #rnaData contains log2 rpkm values of 74 samples and 24015 genes used for DE expression analysis  
datExpr <- as.data.frame(datExpr)
datExpr$moduleColors_merged <- moduleColors_merged
datExpr$moduleColors <- net$colors

datExpr$gene_name <- rownames(datExpr)

original_exp.data <- read.csv(paste0(extsub_dir,"/", "Count_gene_set_from_initial_submission.csv"))

y <- filter(datExpr, gene_name %in% original_exp.data$gene_name)

moduleColors_merged <- y$moduleColors_merged
moduleColors <- y$moduleColors

rownames(y) <- y$gene_name
y$gene_name <- NULL
y$moduleColors_merged <- NULL
y$moduleColors <- NULL

datExpr <- y
datTraits <- data.frame(DPC=metData$DPC,
                        Condition=ifelse(metData$Condition=="PolyIC", 1, 0), 
                        Sex=ifelse(metData$sex.by.rna == "M", 1, 0))
 
 # Define numbers of genes and samples
 nGenes = ncol(t(datExpr));
 nSamples = nrow(t(datExpr));
 # Recalculate MEs with color labels
 MEs0 = moduleEigengenes(t(datExpr), moduleColors_merged)$eigengenes
 MEs = orderMEs(MEs0)
 moduleTraitCor = cor(MEs, datTraits, use = "p");
 moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
 
 #knitr::kable(moduleTraitPvalue)
 
 
 #reading the table. We color code each association by the correlation value:

 #sizeGrWindow(10,6)
 # Will display correlations and their p-values
 textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
                    signif(moduleTraitPvalue, 1), ")", sep = "");
 dim(textMatrix) = dim(moduleTraitCor)
 par(mar = c(6, 8.5, 3, 3));
 # Display the correlation values within a heatmap plot
 labeledHeatmap(Matrix = moduleTraitCor,
                xLabels = names(datTraits),
                yLabels = names(MEs),
                ySymbols = names(MEs),
                colorLabels = FALSE,
                colors = colorRampPalette(c("green", "white", "red"))(1000),
                textMatrix = textMatrix,
                setStdMargins = FALSE,
                cex.text = 1,
                zlim = c(-1,1),
                main = paste("Module-trait relationships"))
Fig. 2b Heatmap of correlation between gene expression modules and experimental traits; age, condition (saline vs poly(I:C)), and sex. Filtered 17195 gene set. Blue and Turquoise modules are strongly associated with age; Green and Grey modules are significantly associated with MIA condition. Numerical values represent signed Pearson’s correlation coefficients, with Student asymptotic p values in brackets. Green represents negative and red represents positive correlation. Color intensity signifies the strength of the correlation.

Fig. 2b Heatmap of correlation between gene expression modules and experimental traits; age, condition (saline vs poly(I:C)), and sex. Filtered 17195 gene set. Blue and Turquoise modules are strongly associated with age; Green and Grey modules are significantly associated with MIA condition. Numerical values represent signed Pearson’s correlation coefficients, with Student asymptotic p values in brackets. Green represents negative and red represents positive correlation. Color intensity signifies the strength of the correlation.

1.6 Module trajectories

1.6.1 Pre-merged 10 modules

#A proper box plot
#####
#With 10 modules for the supplement

ME_average_exp = moduleEigengenes(t(datExpr), moduleColors)$eigengenes

ME_df <- data.frame(ME_average_exp, "Condition" = metData$Condition, "DPC" = metData$DPC, "ExperimentalDesign" = metData$ExperimentalDesign)


## Eigengenes
Eigengene_module_expression <- function(x){
df <- dplyr::select(ME_df, paste0("ME", x), "Condition", "DPC", "ExperimentalDesign")

j_brew_colors <- brewer.pal(n = 8, name = "Paired")[c(6,2)]

ggplot(df, mapping = aes(x = DPC, y=get(colnames(df)[1]),  colour=Condition))+
  geom_point(size=2, aes(group=ExperimentalDesign), position=position_dodge(width=0.3))+
  geom_boxplot(alpha=0, aes(group=ExperimentalDesign), position="identity")+
  theme_bw()+
  geom_smooth(method = "loess", se=T, aes(fill=Condition,  group=Condition), size  = 1, alpha=0.1)+
  theme(axis.text.x=element_text(angle=50, vjust=0.9, hjust=1, size=12, face="bold"))+
  labs(title= paste(capitalize(x), "module"), x="", y="Eigengene expression")+
  theme(plot.title = element_text(size = rel(2), hjust=0.5))+
  scale_color_manual(values = j_brew_colors)+
  scale_fill_manual(values = j_brew_colors)+
  scale_x_continuous(breaks =c(12.5, 14.5, 17.5, 19.5), labels = c("E12.5", "E14.5", "E17.5", "P0"))+
  coord_cartesian(ylim=c(-0.3, 0.4))+
   theme(#legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
}


pl <- lapply(c("blue", "turquoise", "brown", "red", "pink", "yellow", "magenta", "black", "green",  "grey"), function(x)
        Eigengene_module_expression(x))


ml <- marrangeGrob(pl, nrow=2, ncol=5, top = "", layout_matrix = rbind(c(1:5), c(6:10)))
ml

1.6.2 Merged 6 modules

##### 6 module trajectories ######
ME_average_exp = moduleEigengenes(t(datExpr), moduleColors_merged)$eigengenes

ME_df <- data.frame(ME_average_exp, "Condition" = metData$Condition, "DPC" = metData$DPC, "ExperimentalDesign" = metData$ExperimentalDesign)

## Eigengenes
Eigengene_module_expression <- function(x){
df <- dplyr::select(ME_df, paste0("ME", x), "Condition", "DPC", "ExperimentalDesign")

j_brew_colors <- brewer.pal(n = 8, name = "Paired")[c(6,2)]

ggplot(df, mapping = aes(x = DPC, y=get(colnames(df)[1]),  colour=Condition))+
  geom_point(size=2, aes(group=ExperimentalDesign), position=position_dodge(width=0.3))+
  geom_boxplot(alpha=0, aes(group=ExperimentalDesign), position="identity")+
  theme_bw()+
  geom_smooth(method = "loess", se=T, aes(fill=Condition,  group=Condition), size  = 1, alpha=0)+
  theme(axis.text.x=element_text(angle=50, vjust=0.9, hjust=1, size=12, face="bold"))+
  labs(title= paste(capitalize(x), "module"), x="", y="Eigengene expression")+
  theme(plot.title = element_text(size = rel(2), hjust=0.5))+
  scale_color_manual(values = j_brew_colors)+
  scale_fill_manual(values = j_brew_colors)+
   coord_cartesian(ylim=c(-0.3, 0.4))+
  scale_x_continuous(breaks =c(12.5, 14.5, 17.5, 19.5), labels = c("E12.5", "E14.5", "E17.5", "P0"))+
  theme(legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
}


pl <- lapply(c("blue", "turquoise", "BrRePi", "YeMaBl", "green",  "grey"), function(x)
        Eigengene_module_expression(x))


ml <- marrangeGrob(pl, nrow=2, ncol=3, top = "", layout_matrix = rbind(c(1:3), c(4:6)))
ml
Fig. 2c Module eigengene expression for MIA (red) and control (blue) groups plotted by time-point illustrates expression trajectories across developmental stages, capturing module- and stage-specific differences between MIA and control groups.

Fig. 2c Module eigengene expression for MIA (red) and control (blue) groups plotted by time-point illustrates expression trajectories across developmental stages, capturing module- and stage-specific differences between MIA and control groups.

# 
#save.image("G:/Shared drives/Nord Lab - Computational #Projects/MIA/Canales_eLIFE_2021_WGCNA_GO/Workspace_3_12_2021.RData")

#load("G:/Shared drives/Nord Lab - Computational Projects/MIA/Canales_eLIFE_2021_WGCNA_GO/Workspace_3_12_2021.RData")

1.7 Green and Grey module gene expression

# NOTE: The green color marking was only used to aid with identifying points for precise labeling in the Illustrator 

#Filters the original merged_MM_and_GS object. Needed for counting genes in each module.
merged_MM_and_GS_filtered <- filter(merged_MM_and_GS, gene_name %in% rownames(datExpr))
merged_MM_and_GS_filtered$moduleColors_merged <- moduleColors_merged # Yes, I checked the order.

# Reading DE supplementary files
# [,2:7] skips the already present module color assignment columns
E12.5_GLM <- read.csv("Supplementary Table 1 , E12.5_DE_saline_vs_polyIC.csv")[,2:7]
E14.5_GLM <- read.csv("Supplementary Table 2, E14.5_DE_saline_vs_polyIC.csv")[,2:7]
E17.5_GLM <- read.csv("Supplementary Table 3, E17.5_DE_saline_vs_polyIC.csv")[,2:7]
E19.5_GLM <- read.csv("Supplementary Table 4, P0_DE_saline_vs_polyIC.csv")[,2:7]

E12.5_GLM_module <- merge(E12.5_GLM, merged_MM_and_GS_filtered[,c(1,18, 29)], by="gene_name")
E14.5_GLM_module <- merge(E14.5_GLM, merged_MM_and_GS_filtered[,c(1,18, 29)], by="gene_name")
E17.5_GLM_module <- merge(E17.5_GLM, merged_MM_and_GS_filtered[,c(1,18, 29)], by="gene_name")
E19.5_GLM_module <- merge(E19.5_GLM, merged_MM_and_GS_filtered[,c(1,18, 29)], by="gene_name")


E12.5_GLM_module$DPC <- rep("E12.5", nrow(E12.5_GLM_module))
E14.5_GLM_module$DPC <- rep("E14.5", nrow(E14.5_GLM_module))
E17.5_GLM_module$DPC <- rep("E17.5", nrow(E17.5_GLM_module))
E19.5_GLM_module$DPC <- rep("E19.5", nrow(E19.5_GLM_module))


all_GLM_module <- rbind(E12.5_GLM_module, E14.5_GLM_module, E17.5_GLM_module, E19.5_GLM_module)

all_GLM_module$Significant <- ifelse(all_GLM_module$PValue < 0.05, "P < 0.05", "P > 0.05")
all_GLM_module$Significant <- ifelse(all_GLM_module$FDR < 0.05, "FDR < 0.05", all_GLM_module$Significant)


all_GLM_module$moduleColors_merged <- factor(all_GLM_module$moduleColors_merged, levels = c("blue", "turquoise", "BrRePi", "YeMaBl", "grey", "green"))

all_GLM_module <- arrange(all_GLM_module, moduleColors_merged)

all_GLM_module$DPC_module <- paste0(all_GLM_module$DPC, "_", all_GLM_module$moduleColors_merged)

all_GLM_module$moduleColors_merged <- factor(all_GLM_module$moduleColors_merged, levels = c("blue", "turquoise", "BrRePi", "YeMaBl", "green", "grey"))

all_GLM_module <- arrange(all_GLM_module, moduleColors_merged)


#
all_GLM_module <- filter(all_GLM_module, logCPM > 0, DPC %in% c("E12.5", "E14.5"))
all_GLM_module_filtered <- filter(all_GLM_module, moduleColors_merged %in% c("green", "grey"))

#Fig 3a Mol Psych
sel_gene_list <- c("Vegfa", "Flt1", "Adm", "Il20rb", "Bnip3", "Pdk1")

pos <- position_jitter(seed = 1234)
df_mod <- all_GLM_module_filtered
df_mod$logFC <- ifelse(df_mod$logFC > 4, 4, df_mod$logFC)
df_mod$logFC <- ifelse(df_mod$logFC < -2, -2, df_mod$logFC)


p <- ggplot(df_mod, 
              aes(x=moduleColors_merged, y=logFC, label=gene_name, color = Significant, fill = Significant))+
  geom_jitter(data=filter(df_mod, Significant == "P > 0.05"), 
              aes(fill = Significant), pch=21, alpha=0.1, position = pos)+
  geom_jitter(data=filter(df_mod, Significant == "P < 0.05"), 
              aes(fill = Significant), pch=21, alpha=0.3, position = pos)+
  geom_jitter(data=filter(df_mod, Significant == "FDR < 0.05"), 
              aes(fill = Significant), pch=21, alpha=0.7, position = pos)+
  
  geom_point(data=filter(df_mod, logFC > 0, PValue < 0.05, gene_name %in% sel_gene_list), 
              aes(x=moduleColors_merged, y=logFC, fill = "green"), 
                  pch=21, size=2, alpha=1, position = pos)+
  
  geom_violin(data=df_mod, 
                aes(x=moduleColors_merged, y=logFC, group=DPC_module), alpha=0.2)+
  geom_text_repel(data=filter(df_mod, logFC > 0, PValue < 0.05, gene_name %in% sel_gene_list),
                  point.padding = 1, position = pos)+
  theme_bw()+
  
  scale_fill_manual(values=c("FDR < 0.05" = "#E41A1C", "P < 0.05"="#eb8889", 
                             "P > 0.05" ="grey", "green"="green"))+
  scale_color_manual(values=c("FDR < 0.05" = "black", "P < 0.05"="#eb8889", 
                              "P > 0.05" ="grey", "green"="green"))+
  
  theme(axis.text.x=element_text(vjust=0.9, hjust=1, size=10))+
  labs(x = "", y = "log2FC")+
  facet_wrap(~DPC, ncol = 2)+
  geom_hline(yintercept = 0)+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
  
p
Fig. 2a Violin plots visualize distribution of log2 fold changes of Green and Grey module gene expression between control and MIA animals. At E12.5, an initial set of genes in the Green and Grey modules are induced and DE in MIA samples. By E14.5, generalized module expression exhibits induction in the MIA samples, with particularly strong change for the Green module, where nearly all genes are pregulated. Genes with expression trajectories shown in (d) are labeled.

Fig. 2a Violin plots visualize distribution of log2 fold changes of Green and Grey module gene expression between control and MIA animals. At E12.5, an initial set of genes in the Green and Grey modules are induced and DE in MIA samples. By E14.5, generalized module expression exhibits induction in the MIA samples, with particularly strong change for the Green module, where nearly all genes are pregulated. Genes with expression trajectories shown in (d) are labeled.

1.8 Module enrichment among DE genes

#Module gene lists
gene_lists <- lapply(c("blue", "turquoise", "BrRePi", "YeMaBl", "green",  "grey"), function(x)
        filter(merged_MM_and_GS_filtered, moduleColors_merged ==x)$gene_name)

blue_genes <- gene_lists[[1]]
turquoise_genes <- gene_lists[[2]]
BrRePi_genes <-  gene_lists[[3]]
YeMaBl_genes <- gene_lists[[4]]
green_genes <- gene_lists[[5]]
grey_genes <-  gene_lists[[6]] 
# Hypergeometric test split into upregulated and downregulated genes. 

gene_set_enrichment_directional <- function(module_color, DPC){

module <- get(paste0(module_color, "_genes"))  
DPC_dataset <- get(paste0(DPC, "_GLM"))

n_of_genes_in_a_module <- length(module)

n_of_DE_Genes_at_a_DPC <- length(filter(DPC_dataset, PValue < 0.05)$gene_name)

n_of_overlapping_DE_genes <- length(intersect(module, filter(DPC_dataset, PValue < 0.05)$gene_name))

fraction_of_overlapping_DE_genes_with_a_module <- n_of_overlapping_DE_genes/n_of_genes_in_a_module

n_of_upregulated_overlapping_genes <- length(intersect(module, filter(DPC_dataset, PValue < 0.05, logFC > 0)$gene_name))
n_of_downregulated_overlapping_genes <- length(intersect(module, filter(DPC_dataset, PValue < 0.05, logFC < 0)$gene_name))

# Comments on the right refer to the post in the link, not the current dataset
  n <- nrow(DPC_dataset)   #Total gene population 15k
  a <- length(module)  #RNA Seq enriched # 3000
  b_up <- length(filter(DPC_dataset, PValue < 0.05 & logFC > 0)$gene_name) #Enriched by Chip-Seq 400
  t_up <- length(intersect(module, filter(DPC_dataset, PValue < 0.05 & logFC > 0)$gene_name)) #Intersected 100
  b_down <- length(filter(DPC_dataset, PValue < 0.05 & logFC < 0)$gene_name)
  t_down <- length(intersect(module, filter(DPC_dataset, PValue < 0.05 & logFC < 0)$gene_name))

  #https://stats.stackexchange.com/questions/16247/calculating-the-probability-of-gene-list-overlap-between-an-rna-seq-and-a-chip-c
# Tests if the number of overlapping genes with a module is enriched comparing to the overall number of genes DE at a DPC.  
  
hypergeometric_test_p_value_upregulated <- sum(dhyper(t_up:b_up, a, n - a, b_up))

hypergeometric_test_p_value_downregulated <- sum(dhyper(t_down:b_down, a, n - a, b_down))

x <- c(n_of_DE_Genes_at_a_DPC, n_of_genes_in_a_module, n_of_overlapping_DE_genes,  fraction_of_overlapping_DE_genes_with_a_module, hypergeometric_test_p_value_upregulated,hypergeometric_test_p_value_downregulated,
  format(n_of_upregulated_overlapping_genes, scientific=FALSE),     
  format(n_of_downregulated_overlapping_genes, scientific=FALSE))

x <- as.data.frame(x)
colnames(x) <- "values"
x$module_color <- rep(module_color, 8)
x$DPC <- rep(DPC, 8)
x$stat <- c("n_of_DE_Genes_at_a_DPC", "n_of_genes_in_a_module",  "n_of_overlapping_DE_genes",  "fraction_of_overlapping_DE_genes_with_a_module", "hypergeometric_test_p_value_upregulated", "hypergeometric_test_p_value_downregulated", "n_of_upregulated_overlapping_genes", "n_of_downregulated_overlapping_genes")
x <- x[,c(4,3,2,1)]
#x$values <- format(x$values, scientific=FALSE)
x
}


DPCs <- c("E12.5", "E14.5", "E17.5", "E19.5")
colors <- c("blue", "turquoise", "BrRePi", "YeMaBl", "green",  "grey")

geneset_enrichment_list_directional <- lapply(DPCs, function(y) lapply(colors, function(x) FUN= gene_set_enrichment_directional(x,y)))


# Combines the stats for each DPC
E12.5_enrichment_in_modules_dir <- as.data.frame(rbindlist(geneset_enrichment_list_directional[[1]]))
E14.5_enrichment_in_modules_dir <- as.data.frame(rbindlist(geneset_enrichment_list_directional[[2]]))
E17.5_enrichment_in_modules_dir <- as.data.frame(rbindlist(geneset_enrichment_list_directional[[3]]))
E19.5_enrichment_in_modules_dir <- as.data.frame(rbindlist(geneset_enrichment_list_directional[[4]]))



# Extracts enrichment p values 
E12.5_module_enrichment_p_values_dir <- filter(E12.5_enrichment_in_modules_dir, stat %in% c("hypergeometric_test_p_value_upregulated", "hypergeometric_test_p_value_downregulated"))

E14.5_module_enrichment_p_values_dir <- filter(E14.5_enrichment_in_modules_dir, stat %in% c("hypergeometric_test_p_value_upregulated", "hypergeometric_test_p_value_downregulated"))

E17.5_module_enrichment_p_values_dir <- filter(E17.5_enrichment_in_modules_dir, stat %in% c("hypergeometric_test_p_value_upregulated", "hypergeometric_test_p_value_downregulated"))

E19.5_module_enrichment_p_values_dir <- filter(E19.5_enrichment_in_modules_dir, stat %in% c("hypergeometric_test_p_value_upregulated", "hypergeometric_test_p_value_downregulated"))



# dir stands for direction here...
module_enrich_fun_dir <- function(object, title){
#object <- E17.5_module_enrichment_p_values_dir

object$values <- -log10(as.numeric(as.character(object$value))) + 0.00001 # small addition fixes color assignment issues for value=0

object$DE_dir <- rep(c("up", "down"), 6)

object_up <- filter(object, DE_dir=="up")
object_up$module_color <- factor(object_up$module_color, levels=object_up$module_color)
#Converts Inf value to 25. 
object_up$values <- ifelse(object_up$values == "Inf", 300, object_up$values)


object_down <- filter(object, DE_dir=="down")
object_down$module_color <- factor(object_down$module_color, levels=object_down$module_color)  
object_down$values <- object_down$values*-1

# Converts -Inf value to -300. 
object_down$values <- ifelse(object_down$values == "-Inf", -300, object_down$values)

colors <- rep(c("blue", "turquoise", "brown", "yellow", "green", "grey"), each=2)
    
ggplot()+
  geom_bar(data = object_up, aes(x=module_color, y= (as.numeric(as.character(values)))), stat = "identity", fill=c("blue", "turquoise", "brown", "yellow", "green", "grey"), color="black")+
geom_bar(data = object_down, aes(x=module_color, y= (as.numeric(as.character(values)))), stat = "identity", fill=c("blue", "turquoise", "brown", "yellow", "green", "grey"), color="black")+ 
  theme_bw()+
  theme(axis.text.x=element_text(angle=40, vjust=1, hjust=1))+
  labs(x="",y="-log10 (p)", title=title)+
  theme(plot.title=element_text(hjust=0.5))+
  geom_hline(aes(yintercept=-log10(0.05)), colour="#990000", linetype="dashed")+
  geom_hline(aes(yintercept=log10(0.05)), colour="#990000", linetype="dashed")+
  geom_hline(aes(yintercept=0), colour="black", linetype="solid", size=1)
}



E12.5_DE_enrichment_plot_dir <- module_enrich_fun_dir(E12.5_module_enrichment_p_values_dir, "E12.5")+theme(axis.text.x = element_blank())

E14.5_DE_enrichment_plot_dir <- module_enrich_fun_dir(E14.5_module_enrichment_p_values_dir, "E14.5")+theme(axis.text.x = element_blank())

E17.5_DE_enrichment_plot_dir <- module_enrich_fun_dir(E17.5_module_enrichment_p_values_dir, "E17.5")+theme(axis.text.x = element_blank())

E19.5_DE_enrichment_plot_dir <- module_enrich_fun_dir(E19.5_module_enrichment_p_values_dir, "P0")

pl <- list(E12.5_DE_enrichment_plot_dir, E14.5_DE_enrichment_plot_dir, E17.5_DE_enrichment_plot_dir, E19.5_DE_enrichment_plot_dir)

#ml <- marrangeGrob(pl, nrow=2, ncol=2, top=textGrob("", gp=gpar(fontsize=12)), bottom="")
#ml

#E12.5_DE_enrichment_plot_dir
#E14.5_DE_enrichment_plot_dir
#E17.5_DE_enrichment_plot_dir
#E19.5_DE_enrichment_plot_dir


### Saving Hypergeometric test for future reference
hyp_geo_p_val_df <- rbind(E12.5_module_enrichment_p_values_dir, E14.5_module_enrichment_p_values_dir,
      E17.5_module_enrichment_p_values_dir, E19.5_module_enrichment_p_values_dir)

hyp_geo_p_val_df$dir <- rep(c("Up", "Down"), 24)

hyp_geo_p_val_df$stat <- NULL

colnames(hyp_geo_p_val_df) <- c("DPC", "module_color", "hypergeometric_test_p_value", "Direction")

hyp_geo_p_val_df <- hyp_geo_p_val_df[,c(1,2,4,3)]

hyp_geo_p_val_df$hypergeometric_test_p_value <- as.numeric(as.character(hyp_geo_p_val_df$hypergeometric_test_p_value)) 

hyp_geo_p_val_df$Significant <- ifelse(hyp_geo_p_val_df$hypergeometric_test_p_value < 0.05, TRUE, FALSE)

#hyp_geo_p_val_df
### Numbers of genes ####
E12.5_module_enrichment_ns <- filter(E12.5_enrichment_in_modules_dir, stat %in% c("n_of_DE_Genes_at_a_DPC", "n_of_upregulated_overlapping_genes", "n_of_downregulated_overlapping_genes"))


E14.5_module_enrichment_ns <- filter(E14.5_enrichment_in_modules_dir, stat %in% c("n_of_DE_Genes_at_a_DPC", "n_of_upregulated_overlapping_genes", "n_of_downregulated_overlapping_genes"))


E17.5_module_enrichment_ns <- filter(E17.5_enrichment_in_modules_dir, stat %in% c("n_of_DE_Genes_at_a_DPC", "n_of_upregulated_overlapping_genes", "n_of_downregulated_overlapping_genes"))


E19.5_module_enrichment_ns <- filter(E19.5_enrichment_in_modules_dir, stat %in% c("n_of_DE_Genes_at_a_DPC", "n_of_upregulated_overlapping_genes", "n_of_downregulated_overlapping_genes"))


module_enrich_fun_dir_ns <- function(object, title){
#object <- E12.5_module_enrichment_ns
  
object$values <- as.numeric(as.character(object$value)) + 0.001

# threshold is the number of DE genes < 0.05 at a particular DPC divided by 12. There are 6 modules, and we are considering enrichment in 2 directions = 12. 
threshold <- as.numeric(as.character(object[1,4]))/12

object$DE_dir <- rep(c("_total","_up", "_down"), 6)

object$module_color_dir <- paste0(object$module_color, object$DE_dir)

object <- filter(object, stat %in% c("n_of_upregulated_overlapping_genes", "n_of_downregulated_overlapping_genes"))

object$module_color_dir <- factor(object$module_color_dir, levels=object$module_color_dir)
  
colors <- rep(c("blue", "turquoise", "brown", "yellow", "green", "grey"))

object_up <- filter(object, DE_dir=="_up")
object_up$module_color <- factor(object_up$module_color, levels=object_up$module_color)
  
object_down <- filter(object, DE_dir=="_down")
object_down$module_color <- factor(object_down$module_color, levels=object_down$module_color)

ggplot()+
geom_bar(data = object_up, aes(x=module_color, y= as.numeric(as.character(values))), stat = "identity", fill=colors, color="black")+
geom_bar(data = object_down, aes(x=module_color, y= -as.numeric(as.character(values))), stat = "identity", fill=colors, color="black")+ 
  theme_bw()+
  theme(axis.text.x=element_text(angle=40, vjust=1, hjust=1))+
  labs(x="",y="N of P < 0.05 genes", title=title)+
  theme(plot.title=element_text(hjust=0.5))+
  #geom_hline(aes(yintercept=threshold), colour="#990000", linetype="dashed")+
  #geom_hline(aes(yintercept=-threshold), colour="#990000", linetype="dashed")+
  geom_hline(aes(yintercept=0), colour="black", linetype="solid", size=1)+
  theme(legend.key = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text=element_text(colour="black")
        )
}


E12.5_DE_enrichment_plot_ns <- module_enrich_fun_dir_ns(E12.5_module_enrichment_ns, "E12.5")
E14.5_DE_enrichment_plot_ns <- module_enrich_fun_dir_ns(E14.5_module_enrichment_ns, "E14.5")
E17.5_DE_enrichment_plot_ns <- module_enrich_fun_dir_ns(E17.5_module_enrichment_ns, "E17.5")
E19.5_DE_enrichment_plot_ns <- module_enrich_fun_dir_ns(E19.5_module_enrichment_ns, "P0")

pl <- list(E12.5_DE_enrichment_plot_ns, E14.5_DE_enrichment_plot_ns, E17.5_DE_enrichment_plot_ns, E19.5_DE_enrichment_plot_ns)

ml <- marrangeGrob(pl, nrow=2, ncol=2, top=textGrob("", gp=gpar(fontsize=12)), bottom="")
ml
Figure supplement 4. Numbers of DE genes passing P < 0.05 split by modules.

Figure supplement 4. Numbers of DE genes passing P < 0.05 split by modules.

# E19.5 represents P0

knitr::kable(hyp_geo_p_val_df)
DPC module_color Direction hypergeometric_test_p_value Significant
E12.5 blue Up 0.0000000 TRUE
E12.5 blue Down 1.0000000 FALSE
E12.5 turquoise Up 1.0000000 FALSE
E12.5 turquoise Down 0.0000000 TRUE
E12.5 BrRePi Up 0.9999999 FALSE
E12.5 BrRePi Down 1.0000000 FALSE
E12.5 YeMaBl Up 0.0000000 TRUE
E12.5 YeMaBl Down 1.0000000 FALSE
E12.5 green Up 0.8462219 FALSE
E12.5 green Down 0.9989543 FALSE
E12.5 grey Up 0.9905329 FALSE
E12.5 grey Down 0.9999998 FALSE
E14.5 blue Up 1.0000000 FALSE
E14.5 blue Down 0.0000000 TRUE
E14.5 turquoise Up 0.0000000 TRUE
E14.5 turquoise Down 1.0000000 FALSE
E14.5 BrRePi Up 1.0000000 FALSE
E14.5 BrRePi Down 0.0000000 TRUE
E14.5 YeMaBl Up 0.0000000 TRUE
E14.5 YeMaBl Down 1.0000000 FALSE
E14.5 green Up 0.0000000 TRUE
E14.5 green Down 1.0000000 FALSE
E14.5 grey Up 1.0000000 FALSE
E14.5 grey Down 1.0000000 FALSE
E17.5 blue Up 1.0000000 FALSE
E17.5 blue Down 0.0000000 TRUE
E17.5 turquoise Up 0.0000000 TRUE
E17.5 turquoise Down 1.0000000 FALSE
E17.5 BrRePi Up 1.0000000 FALSE
E17.5 BrRePi Down 1.0000000 FALSE
E17.5 YeMaBl Up 1.0000000 FALSE
E17.5 YeMaBl Down 0.7199469 FALSE
E17.5 green Up 1.0000000 FALSE
E17.5 green Down 1.0000000 FALSE
E17.5 grey Up 1.0000000 FALSE
E17.5 grey Down 1.0000000 FALSE
E19.5 blue Up 1.0000000 FALSE
E19.5 blue Down 0.9999975 FALSE
E19.5 turquoise Up 1.0000000 FALSE
E19.5 turquoise Down 0.0000000 TRUE
E19.5 BrRePi Up 1.0000000 FALSE
E19.5 BrRePi Down 0.9980255 FALSE
E19.5 YeMaBl Up 0.0000000 TRUE
E19.5 YeMaBl Down 1.0000000 FALSE
E19.5 green Up 0.0749699 FALSE
E19.5 green Down 1.0000000 FALSE
E19.5 grey Up 0.9777861 FALSE
E19.5 grey Down 0.5727552 FALSE
####

2 Gene Ontology Enrichment

2.1 GO BP enrichment among DE genes

# Function calculating GO enrichment analysis and pulling out significant genes in each GO category 
# This script uses FDR < 0.05 gene DE threshold. P0 time point is omitted because it doesn't have any FDR < 0.05 genes.

# q is the DE analysis dataset, 
# b is "upregulated" or "downregulated",
# c represent the number of non trivial nodes- maximizes the number of GO category output. Value determined empirically from the error algorithm prompt.

library(topGO)

GO_analysis <- function(q, b, c) {

  background.genes <- q$gene_name
  geneUniverse <- background.genes
  
  if(b=="upregulated"){
  test.genes <- filter(q, FDR < 0.05, logFC > 0)$gene_name
  } else if (b=="downregulated"){
    test.genes <- filter(q, FDR < 0.05, logFC < 0)$gene_name
  } else {
    print("Incorect fold change parameter")
    stop()
  }

  genesOfInterest <- test.genes
  geneList <- factor(as.integer(geneUniverse %in% genesOfInterest))
  names(geneList) <- geneUniverse
  myGOdata <- new("topGOdata", description="My project", ontology="BP", allGenes=geneList,  annot=annFUN.org,    mapping="org.Mm.eg.db", ID = "alias", nodeSize=20)
  print(myGOdata)
  
resultFisher <- runTest(myGOdata, algorithm = "weight01", statistic = "fisher")
#resultKS <- runTest(myGOdata, algorithm = "classic", statistic = "ks")
#resultKS.elim <- runTest(myGOdata, algorithm = "elim", statistic = "ks")
#classicKS = resultKS, elimKS = resultKS.elim, - add later

allRes <- GenTable(myGOdata, classicFisher = resultFisher, orderBy = "classicFisher", topNodes = c)

#showSigOfNodes(myGOdata, score(resultKS.elim), firstSigNodes = 5, useInfo = 'all')
#nodes_plot <- recordPlot(showSigOfNodes(myGOdata, score(resultKS.elim), firstSigNodes = 5, useInfo = 'all'))

#Building a df of DE genes belonging to top 20 GO BP caegories
DE_genes_in_top_GO_cat <- function(r){
 fisher.go <- allRes[r,1]
 #print(allRes[x,c(1,2)])
 fisher.ann.genes <- genesInTerm(myGOdata, whichGO=fisher.go)
 df <- data.frame(GO.ID = allRes[r,c(1)], Term = allRes[r,c(2)], gene_name=intersect(as.character(fisher.ann.genes[[1]]), q$gene_name))
 df <- filter(df, gene_name %in% test.genes)
 df
}

list(allRes, lapply(1:20, DE_genes_in_top_GO_cat))
}


# Runs GO analysis - this will take a few minutes. It's read from a file to speak up the report generation.
if(file.exists("GO_df_all.RData")){
  
  load("GO_df_all.RData") } else {

GO_E12.5_up <- GO_analysis(E12.5_GLM, "upregulated", 4440)
GO_E12.5_down <- GO_analysis(E12.5_GLM, "downregulated", 4440)

GO_E14.5_up <- GO_analysis(E14.5_GLM, "upregulated", 4462)
GO_E14.5_down <- GO_analysis(E14.5_GLM, "downregulated", 4462)

GO_E17.5_up <- GO_analysis(E17.5_GLM, "upregulated", 4431)
GO_E17.5_down <- GO_analysis(E17.5_GLM, "downregulated", 4431)

# Constructs a data frame for all 3 time points
GO_E12.5_up_df <- data.frame(GO_E12.5_up[1], DPC=rep("E12.5", nrow(as.data.frame(GO_E12.5_up[1]))),
                             DE_direction=rep("Up", nrow(as.data.frame(GO_E12.5_up[1]))))

GO_E12.5_down_df <- data.frame(GO_E12.5_down[1], DPC=rep("E12.5", nrow(as.data.frame(GO_E12.5_up[1]))),
                             DE_direction=rep("Down", nrow(as.data.frame(GO_E12.5_down[1]))))

GO_E14.5_up_df <- data.frame(GO_E14.5_up[1], DPC=rep("E14.5", nrow(as.data.frame(GO_E14.5_up[1]))),
                             DE_direction=rep("Up", nrow(as.data.frame(GO_E14.5_up[1]))))

GO_E14.5_down_df <- data.frame(GO_E14.5_down[1], DPC=rep("E14.5", nrow(as.data.frame(GO_E14.5_up[1]))),
                             DE_direction=rep("Down", nrow(as.data.frame(GO_E14.5_down[1]))))


GO_E17.5_up_df <- data.frame(GO_E17.5_up[1], DPC=rep("E17.5", nrow(as.data.frame(GO_E17.5_up[1]))),
                             DE_direction=rep("Up", nrow(as.data.frame(GO_E17.5_up[1]))))

GO_E17.5_down_df <- data.frame(GO_E17.5_down[1], DPC=rep("E17.5", nrow(as.data.frame(GO_E17.5_up[1]))),
                             DE_direction=rep("Down", nrow(as.data.frame(GO_E17.5_down[1]))))

#
GO_df_all <- rbind(GO_E12.5_up_df, GO_E12.5_down_df,
      GO_E14.5_up_df, GO_E14.5_down_df,
      GO_E17.5_up_df, GO_E17.5_down_df)

GO_df_all$classicFisher <- as.numeric(GO_df_all$classicFisher)

}


#Calculate enrichment
padding = 5
GO_df_all$Enrichment <- log2((GO_df_all$Significant + padding)/(GO_df_all$Expected + padding))

GO_df_all$GO.ID.Term <- paste(GO_df_all$GO.ID, GO_df_all$Term, sep=".")

#Filters GO terms that meet enrichment criteria
short.terms <- unique(filter(GO_df_all, classicFisher < 0.05, Enrichment > 0)$GO.ID.Term)

GO_df_all_filtered <- filter(GO_df_all, GO.ID.Term %in% short.terms)
GO_df_all_filtered$GO.ID.Term <- factor(GO_df_all_filtered$GO.ID.Term, levels = short.terms)
  

E12.5_GOs_up <- dplyr::select(filter(GO_df_all_filtered, DPC=="E12.5", DE_direction=="Up"),GO.ID.Term, Enrichment)
E14.5_GOs_up <- dplyr::select(filter(GO_df_all_filtered, DPC=="E14.5", DE_direction=="Up"),GO.ID.Term, Enrichment)
E17.5_GOs_up <- dplyr::select(filter(GO_df_all_filtered, DPC=="E17.5", DE_direction=="Up"),GO.ID.Term, Enrichment)

E12.5_GOs_down <- dplyr::select(filter(GO_df_all_filtered, DPC=="E12.5", DE_direction=="Down"),GO.ID.Term, Enrichment)
E14.5_GOs_down <- dplyr::select(filter(GO_df_all_filtered, DPC=="E14.5", DE_direction=="Down"),GO.ID.Term, Enrichment)
E17.5_GOs_down <- dplyr::select(filter(GO_df_all_filtered, DPC=="E17.5", DE_direction=="Down"),GO.ID.Term, Enrichment)


colnames(E12.5_GOs_up) <- c("GO.ID.Term", "E12.5_up")
colnames(E14.5_GOs_up) <- c("GO.ID.Term", "E14.5_up")
colnames(E17.5_GOs_up) <- c("GO.ID.Term", "E17.5_up")


colnames(E12.5_GOs_down) <- c("GO.ID.Term", "E12.5_down")
colnames(E14.5_GOs_down) <- c("GO.ID.Term", "E14.5_down")
colnames(E17.5_GOs_down) <- c("GO.ID.Term", "E17.5_down")


# Merges upregulated and downregulated enrichment values
a <- merge(E12.5_GOs_up, E14.5_GOs_up, by="GO.ID.Term", all = T)
Upregulated_genes_GO <- merge(a, E17.5_GOs_up, by="GO.ID.Term", all = T)

a <- merge(E12.5_GOs_down, E14.5_GOs_down, by="GO.ID.Term", all = T)
Downregulated_genes_GO <- merge(a, E17.5_GOs_down, by="GO.ID.Term", all = T)



GO_df_assembled <- merge(Downregulated_genes_GO, Upregulated_genes_GO,  by="GO.ID.Term") 
GO_df_assembled <- arrange(GO_df_assembled, GO.ID.Term)

rownames(GO_df_assembled) <- GO_df_assembled$GO.ID.Term
GO_df_assembled$GO.ID.Term <- NULL

x <- as.matrix(GO_df_assembled)

x[is.na(x)] <- 0

# A heatmap with 1457 categories = not very informative

#paletteLength <- 100

#myBreaks <- c(seq(min(x), 0, length.out=ceiling(paletteLength/2) + 1), 
#              seq(max(x)/paletteLength, max(x), length.out=floor(paletteLength/2)))
#myColor <- colorRampPalette(c("#9E0041", "#FEBE6E", "white","#43abea", "#13008e"))(paletteLength)

#pheatmap(x, clustering_distance_rows="correlation", cluster_rows = T, cluster_cols = F, legend = T, fontsize_row  = 9, color = rev(myColor), breaks = myBreaks)
# NOTE: Heatmap in Fig. 2d with GO BP categories curated by Karol and Alex. Categories were selected to be representative of WGCNA modules. Heatmaps presented further in this document demonstrate the concordance of this selection with WGCNA GO analysis.

short.terms <- c(
  
#MIA induced module (Green + Grey)
  "GO:0071456.cellular response to hypoxia",
  "GO:0006096.glycolytic process",
  "GO:0009615.response to virus",                            
  "GO:0006954.inflammatory response",
  "GO:0045766.positive regulation of angiogenesis",
  
 #Turquoise
  "GO:0007215.glutamate receptor signaling pathway",
  "GO:0042391.regulation of membrane potential",
 
  #Blue 
  "GO:0022616.DNA strand elongation",
  "GO:0043044.ATP-dependent chromatin remodeling",
  "GO:0007049.cell cycle",
  
 #YeMaBl
  "GO:0007064.mitotic sister chromatid cohesion",
  "GO:0008380.RNA splicing",
 
  #BrRePi
  "GO:0033108.mitochondrial respiratory chain complex ...",
  "GO:0006412.translation"
  )

short.terms <- unique(short.terms)

GO_df_all_filtered <- filter(GO_df_all, GO.ID.Term %in% short.terms)

#length(unique(GO_df_all_filtered$GO.ID.Term))

GO_df_all_filtered$GO.ID.Term <- factor(GO_df_all_filtered$GO.ID.Term, levels = short.terms)
  
E12.5_GOs_up <- dplyr::select(filter(GO_df_all_filtered, DPC=="E12.5", DE_direction=="Up"),GO.ID.Term, Enrichment)
E14.5_GOs_up <- dplyr::select(filter(GO_df_all_filtered, DPC=="E14.5", DE_direction=="Up"),GO.ID.Term, Enrichment)
E17.5_GOs_up <- dplyr::select(filter(GO_df_all_filtered, DPC=="E17.5", DE_direction=="Up"),GO.ID.Term, Enrichment)

E12.5_GOs_down <- dplyr::select(filter(GO_df_all_filtered, DPC=="E12.5", DE_direction=="Down"),GO.ID.Term, Enrichment)
E14.5_GOs_down <- dplyr::select(filter(GO_df_all_filtered, DPC=="E14.5", DE_direction=="Down"),GO.ID.Term, Enrichment)
E17.5_GOs_down <- dplyr::select(filter(GO_df_all_filtered, DPC=="E17.5", DE_direction=="Down"),GO.ID.Term, Enrichment)


colnames(E12.5_GOs_up) <- c("GO.ID.Term", "E12.5_up")
colnames(E14.5_GOs_up) <- c("GO.ID.Term", "E14.5_up")
colnames(E17.5_GOs_up) <- c("GO.ID.Term", "E17.5_up")


colnames(E12.5_GOs_down) <- c("GO.ID.Term", "E12.5_down")
colnames(E14.5_GOs_down) <- c("GO.ID.Term", "E14.5_down")
colnames(E17.5_GOs_down) <- c("GO.ID.Term", "E17.5_down")


a <- merge(E12.5_GOs_up, E14.5_GOs_up, by="GO.ID.Term")
Upregulated_genes_GO <- merge(a, E17.5_GOs_up, by="GO.ID.Term")


a <- merge(E12.5_GOs_down, E14.5_GOs_down, by="GO.ID.Term")
Downregulated_genes_GO <- merge(a, E17.5_GOs_down, by="GO.ID.Term")


GO_df_assembled <- merge(Downregulated_genes_GO, Upregulated_genes_GO,  by="GO.ID.Term") 
GO_df_assembled <- arrange(GO_df_assembled, GO.ID.Term)

rownames(GO_df_assembled) <- GO_df_assembled$GO.ID.Term
GO_df_assembled$GO.ID.Term <- NULL

curated_rownames <- c(
   
#MIA induced module (Green + Grey)
  "Cellular response to hypoxia",
  "Glycolytic process",
  #"Negative regulation of cell proliferation",
  #"Defense response to virus",
  "Response to virus",
  "Inflammatory response",
  "Positive regulation of angiogenesis",

 #Turquoise
  "Glutamate receptor signaling pathway",
  "Regulation of membrane potential",
  
  #Blue
  "DNA strand elongation",
  "ATP-dependent chromatin remodeling",
  "Cell cycle",
  
 #YeMaBl
  "Mitotic sister chromatid cohesion",
  "RNA splicing",
 
  #BrRePi
  "Mitochondrial respiratory chain complex assembly",
  "Translation"
)

#Sanity check = PASSED
# data.frame(rownames(GO_df_assembled), curated_rownames)

rownames(GO_df_assembled) <- curated_rownames 

x <- na.omit(as.matrix(GO_df_assembled))

x[is.infinite(x)] <- 0

#Capping the color scale to 1.5
x <- ifelse(x < -1.5, -1.5, x)  
x <- ifelse(x > 1.5, 1.5, x)

paletteLength <- 100

myBreaks <- c(seq(min(x), 0, length.out=ceiling(paletteLength/2) + 1), 
              seq(max(x)/paletteLength, max(x), length.out=floor(paletteLength/2)))
myColor <- colorRampPalette(c("#9E0041", "#FEBE6E", "white","#43abea", "#13008e"))(paletteLength)

pheatmap(x, clustering_distance_rows="correlation", cluster_rows = F, cluster_cols = F, legend = T, fontsize_row  = 9, color = rev(myColor), breaks = myBreaks)
Fig.2d Heatmap of enrichment of DE genes for representative gene ontology biological processes (GO BP) by developmental time-point shows stage- and module-specific transcriptional pathology. Y-axis rows show enrichment for GO BP terms among module-specific DE genes (FDR < 0.05). Heatmap color scale represents relative fold enrichment for the GO terms among DE genes. P0 not shown due to insufficient DE gene numbers and absence of GO BP terms passing enrichment criteria.

Fig.2d Heatmap of enrichment of DE genes for representative gene ontology biological processes (GO BP) by developmental time-point shows stage- and module-specific transcriptional pathology. Y-axis rows show enrichment for GO BP terms among module-specific DE genes (FDR < 0.05). Heatmap color scale represents relative fold enrichment for the GO terms among DE genes. P0 not shown due to insufficient DE gene numbers and absence of GO BP terms passing enrichment criteria.

2.2 GO BP in modules

GO_analysis_modules <- function(test.genes, c) {
#test.genes <- blue_genes
  
library(topGO)
#Defines a vector containing all gene names
  geneUniverse <- unique(c(as.character(E12.5_GLM$gene_name), as.character(E14.5_GLM$gene_name), as.character(E17.5_GLM$gene_name), as.character(E19.5_GLM$gene_name)))
  
  genesOfInterest <- test.genes
  geneList <- factor(as.integer(geneUniverse %in% genesOfInterest))
  names(geneList) <- geneUniverse
  myGOdata <- new("topGOdata", description="My project", ontology="BP", allGenes=geneList,  annot=annFUN.org,    mapping="org.Mm.eg.db", ID = "alias", nodeSize=20)
  print(myGOdata)
  
resultFisher <- runTest(myGOdata, algorithm = "weight01", statistic = "fisher")

allRes <- GenTable(myGOdata, classicFisher = resultFisher, orderBy = "classicFisher", topNodes = c)

#showSigOfNodes(myGOdata, score(resultKS.elim), firstSigNodes = 5, useInfo = 'all')
#nodes_plot <- recordPlot(showSigOfNodes(myGOdata, score(resultKS.elim), firstSigNodes = 5, useInfo = 'all'))

#Building a df of DE genes belonging to top 20 GO BP caegories
DE_genes_in_top_GO_cat <- function(r){
 fisher.go <- allRes[r,1]
 #print(allRes[x,c(1,2)])
 fisher.ann.genes <- genesInTerm(myGOdata, whichGO=fisher.go)
 df <- data.frame(GO.ID = allRes[r,c(1)], Term = allRes[r,c(2)], gene_name=intersect(as.character(fisher.ann.genes[[1]]), test.genes))
 df <- filter(df, gene_name %in% test.genes)
 df
}

list(allRes, lapply(1:20, DE_genes_in_top_GO_cat))
}
# Runs GO BP enrichment for each time point and modules

E12.5_GLM <- E12.5_GLM_module
E14.5_GLM <- E14.5_GLM_module
E17.5_GLM <- E17.5_GLM_module
E19.5_GLM <- E19.5_GLM_module

#Function calculating GO enrichment in each module for DE genes
#It removes modules with no overlapping genes.

GO_df_all_module_gene_subset <- function(intersection_genes){

#This is checking if any intersection with gene module producesses no genes. 
module_color_genes <- c("blue", "turquoise", "BrRePi", "YeMaBl", "green", "grey")

intersect_gene_counts <- rbindlist(lapply(module_color_genes, function(x) FUN= {
         data.frame("n_of_intersected_genes"=length(intersect(get(paste0(x, "_genes")), intersection_genes)), "n_of_genes_in_a_module" = length(get(paste0(x, "_genes"))), "module_color"=x)
         }))

#Excludes modules with less than 5 overlapping genes
test_modules <- as.character(filter(intersect_gene_counts, n_of_intersected_genes > 5)$module_color)

# Skip pararellization if your system has limited memory. This commented out parLapply code needs some packages exported to be functional... 
#ll <- list(GO_analysis_modules)
#clust <- makeCluster(3)
#clusterExport(clust, varlist=c("GO_analysis_modules", "E12.5_GLM", "E14.5_GLM", "E17.5_GLM", #"E19.5_GLM", "blue_genes", "turquoise_genes", "BrRePi_genes", "YeMaBl_genes", "green_genes", #"grey_genes"), envir=environment())


module_GO_list <- lapply(test_modules, function(x) FUN= {
  l_test <- GO_analysis_modules(intersect(get(paste0(x, "_genes")), intersection_genes), 4255)
  data.frame(l_test[1], module_color=rep(x, nrow(as.data.frame(l_test[1]))))
  }
  )

#stopCluster(clust)

GO_df_all <- as.data.frame(rbindlist(module_GO_list))

GO_df_all$classicFisher <- as.numeric(GO_df_all$classicFisher)

padding = 5

GO_df_all$Enrichment <- log2((GO_df_all$Significant + padding)/(GO_df_all$Expected + padding))

GO_df_all$GO.ID.Term <- paste(GO_df_all$GO.ID, GO_df_all$Term, sep=".")
GO_df_all

}


# Lists of DE genes
E12.5_upregulated_genes <- filter(E12.5_GLM, FDR < 0.05, logFC > 0)$gene_name
E12.5_downregulated_genes <- filter(E12.5_GLM, FDR < 0.05, logFC < 0)$gene_name

E14.5_upregulated_genes <- filter(E14.5_GLM, FDR < 0.05, logFC > 0)$gene_name
E14.5_downregulated_genes <- filter(E14.5_GLM, FDR < 0.05, logFC < 0)$gene_name

E17.5_upregulated_genes <- filter(E17.5_GLM, FDR < 0.05, logFC > 0)$gene_name
E17.5_downregulated_genes <- filter(E17.5_GLM, FDR < 0.05, logFC < 0)$gene_name

E19.5_upregulated_genes <- filter(E19.5_GLM, FDR < 0.05, logFC > 0)$gene_name
E19.5_downregulated_genes <- filter(E19.5_GLM, FDR < 0.05, logFC < 0)$gene_name


# Numbers of intersected genes in each module
module_color_genes <- c("blue", "turquoise", "BrRePi", "YeMaBl", "green", "grey")

intersect_gene_counts <- function(intersection_genes){
   rbindlist(lapply(module_color_genes, function(x) FUN= {
         data.frame("n_of_intersected_genes"=length(intersect(get(paste0(x, "_genes")), intersection_genes)), "n_of_genes_in_a_module" = length(get(paste0(x, "_genes"))), "module_color"=x)
         }))
}

E12.5_up_module_ns <- intersect_gene_counts(E12.5_upregulated_genes)
E12.5_down_module_ns <- intersect_gene_counts(E12.5_downregulated_genes)
E14.5_up_module_ns <- intersect_gene_counts(E14.5_upregulated_genes)
E14.5_down_module_ns <- intersect_gene_counts(E14.5_downregulated_genes)
E17.5_up_module_ns <- intersect_gene_counts(E17.5_upregulated_genes)
E17.5_down_module_ns <- intersect_gene_counts(E17.5_downregulated_genes)
E19.5_up_module_ns <- intersect_gene_counts(E19.5_upregulated_genes)
E19.5_down_module_ns <- intersect_gene_counts(E19.5_downregulated_genes)


n_of_intersected_genes_in_modules <- data.frame(E12.5_up_module_ns$module_color, E12.5_up_module_ns$n_of_genes_in_a_module, E12.5_up_module_ns$n_of_intersected_genes, 
           E12.5_down_module_ns$n_of_intersected_genes,
           E14.5_up_module_ns$n_of_intersected_genes,
           E14.5_down_module_ns$n_of_intersected_genes,
           E17.5_up_module_ns$n_of_intersected_genes,
           E17.5_down_module_ns$n_of_intersected_genes,
           E19.5_up_module_ns$n_of_intersected_genes,
           E19.5_down_module_ns$n_of_intersected_genes
           )

colnames(n_of_intersected_genes_in_modules) <- c("module", "n_of_genes_in_a_module", "E12.5_up_insct", "E12.5_down_insct", "E14.5_up_insct", "E14.5_down_insct", "E17.5_up_insct", "E17.5_down_insct", "E19.5_up_insct", "E19.5_down_insct" )

#n_of_intersected_genes_in_modules

# Note: The process of calculating GO BP enrichment analysis takes a lot of time. In order to speed up the process of knitting this report, these objects are loaded from hard drive. Objects recalculated at the time of composing this report well replicate the results published in the manuscript, but minor differences, likely arising from updating R and its packages, result in some small differences in the figure layout. We commit to transparency and reproducibility of research. In our Github repository we provide the newly created R objects containing GO BP enrichment results, and the development versions (*_dev.RData) used for generating figures in our manuscript. If you are reading this, and going through this analysis, you are crazy. Best KC :)

if(all(file.exists("E12.5_up_GO_df_all_module_dev.RData", 
               "E12.5_down_GO_df_all_module_dev.RData",
               "E14.5_up_GO_df_all_module_dev.RData",
               "E14.5_down_GO_df_all_module_dev.RData",
               "E17.5_up_GO_df_all_module_dev.RData",
               "E17.5_down_GO_df_all_module_dev.RData"))){
  
  load("E12.5_up_GO_df_all_module_dev.RData")
  load("E12.5_down_GO_df_all_module_dev.RData")
  load("E14.5_up_GO_df_all_module_dev.RData")
  load("E14.5_down_GO_df_all_module_dev.RData")
  load("E17.5_up_GO_df_all_module_dev.RData")
  load("E17.5_down_GO_df_all_module_dev.RData")
               } else {

E12.5_up_GO_df_all_module <- GO_df_all_module_gene_subset(E12.5_upregulated_genes)
E12.5_down_GO_df_all_module <- GO_df_all_module_gene_subset(E12.5_downregulated_genes)

E14.5_up_GO_df_all_module <- GO_df_all_module_gene_subset(E14.5_upregulated_genes)
E14.5_down_GO_df_all_module <- GO_df_all_module_gene_subset(E14.5_downregulated_genes)

E17.5_up_GO_df_all_module <- GO_df_all_module_gene_subset(E17.5_upregulated_genes)
E17.5_down_GO_df_all_module <- GO_df_all_module_gene_subset(E17.5_downregulated_genes)

               }
# NOTE: Code is the next X chunks is part of the exploratory data analysis, and is provided to demonstrate the less processed and curated results of GO BP analysis


# A development version of R session.
#load("G:/Shared drives/Nord Lab Team Drive/Manuscripts in preparation/MIA_Canales&Estes/Cell reports/Reviews/Figures/Temp_4_WGCNA_GO_FDR.RData")


#####
#Let's identify the top 10 GO categories enriched in each module.
top_Go_categories <- function(DPC_df_all){
GO_df_all <- DPC_df_all

test_modules <- as.character(unique(GO_df_all$module_color))
colors <- test_modules

#lapply is fun, I should be using it more often :)
top_module_GOs <- lapply(test_modules, function(x) FUN= as.data.frame(filter(GO_df_all, module_color==x & Enrichment > 0 & classicFisher < 0.05)$GO.ID.Term[1:10]))

top_module_GOs <- rbindlist(top_module_GOs)

df <- data.frame("top_GO_categories"=top_module_GOs[,1], "module_colors"=rep(test_modules, each=10))

colnames(df) <- c("top_GO_categories", "module_colors")

df
}


#Top Go categories
E12.5_up_GO_df_top_categories <- top_Go_categories(E12.5_up_GO_df_all_module)
E12.5_down_GO_df_top_categories <- top_Go_categories(E12.5_down_GO_df_all_module)

E14.5_up_GO_df_top_categories <- top_Go_categories(E14.5_up_GO_df_all_module)
E14.5_down_GO_df_top_categories <- top_Go_categories(E14.5_down_GO_df_all_module)

E17.5_up_GO_df_top_categories <- top_Go_categories(E17.5_up_GO_df_all_module)
E17.5_down_GO_df_top_categories <- top_Go_categories(E17.5_down_GO_df_all_module)
# DE module heatmap function

DE_module_heatmap <- function(DPC_df_all, top_module_GOs, title){

GOs <- unique(top_module_GOs$top_GO_categories)  
    
GO_df_all <- DPC_df_all  
  
GO_df_all_filtered <- filter(GO_df_all, GO.ID.Term %in% GOs & module_color %in% top_module_GOs$module_colors)


GO_df_all_filtered$GO.ID.Term <- factor(GO_df_all_filtered$GO.ID.Term, levels = GOs)

test_modules <- as.character(unique(GO_df_all_filtered$module_color))

filtered_module_GOs_list <- lapply(test_modules, function(x) FUN= {
  p <- dplyr::select(filter(GO_df_all_filtered, module_color==x),GO.ID.Term, Enrichment)
  colnames(p) <- c("GO.ID.Term", x)
  p
})

merged_top_module_GOs <- Reduce(function(x, y) merge(x, y, by="GO.ID.Term", all=TRUE), filtered_module_GOs_list)

rownames(merged_top_module_GOs) <- merged_top_module_GOs$GO.ID.Term
merged_top_module_GOs$GO.ID.Term <- NULL

x <- na.omit(as.matrix(merged_top_module_GOs))

x[is.infinite(x)] <- 0   # consider applying a max/min values in a matrix to -Inf and + Inf???

myBreaks <- c(seq(min(x), 0, length.out=ceiling(paletteLength/2) + 1), 
              seq(max(x)/paletteLength, max(x), length.out=floor(paletteLength/2)))

myColor <- colorRampPalette(c("#9E0041", "#FEBE6E", "white","#43abea", "#13008e"))(paletteLength)

general_modu_GO_clustered_modules <- pheatmap(x, clustering_distance_rows="correlation", cluster_rows = T, cluster_cols = T, legend = T, fontsize_row  = 9, color = rev(myColor), breaks = myBreaks, main=title)

}

2.2.1 Stage-specific heatmaps

# Removes modules with less than 5 overlapping DE genes.
# I added a manual curation step to remove GO BP categories that don't make sense for brain function
# The manual curation is performed through a negative selection

`%nin%` = Negate(`%in%`)

#E12.5_up
modules_to_use <- filter(n_of_intersected_genes_in_modules, E12.5_up_insct > 5)$module
GO_cats <- filter(E12.5_up_GO_df_top_categories, module_colors %in% modules_to_use)
GO_cats <- GO_cats[c(1, 2, 4, 6, 7, 8, 9, 10:17, 21:24, 26:30),]
E12.5_up_module_pheatmap <- DE_module_heatmap(E12.5_up_GO_df_all_module, GO_cats, "E12.5 upregulated")
Fig.2 Figure supplement 2. Stage-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Colors signify relative log2 fold enrichment.

Fig.2 Figure supplement 2. Stage-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Colors signify relative log2 fold enrichment.

#E12.5_up_module_pheatmap


#E12.5_down has clustering issue. Fixed below
#modules_to_use <- filter(n_of_intersected_genes_in_modules, E12.5_down_insct > 5)$module
#GO_cats <- filter(E12.5_down_GO_df_top_categories, module_colors %in% modules_to_use)
#GO_cats <- GO_cats[1:nrow(GO_cats) %nin% c(2),]
#E12.5_down_module_pheatmap <- DE_module_heatmap(E12.5_down_GO_df_all_module, GO_cats, "E12.5 #downregulated")
#E12.5_down_module_pheatmap


# E12.5 down - fixed
DE_module_heatmap_mod <- function(DPC_df_all, top_module_GOs, title){

GOs <- unique(top_module_GOs$top_GO_categories)  
GO_df_all <- DPC_df_all  
GO_df_all_filtered <- filter(GO_df_all, GO.ID.Term %in% GOs & module_color %in% top_module_GOs$module_colors)


GO_df_all_filtered$GO.ID.Term <- factor(GO_df_all_filtered$GO.ID.Term, levels = GOs)

test_modules <- as.character(unique(GO_df_all_filtered$module_color))

filtered_module_GOs_list <- lapply(test_modules, function(x) FUN= {
  p <- dplyr::select(filter(GO_df_all_filtered, module_color==x),GO.ID.Term, Enrichment)
  colnames(p) <- c("GO.ID.Term", x)
  p
})

merged_top_module_GOs <- Reduce(function(x, y) merge(x, y, by="GO.ID.Term", all=TRUE), filtered_module_GOs_list)

merged_top_module_GOs <- arrange(merged_top_module_GOs, desc(grey))

rownames(merged_top_module_GOs) <- merged_top_module_GOs$GO.ID.Term


merged_top_module_GOs$GO.ID.Term <- NULL

x <- as.matrix(merged_top_module_GOs)

x[is.na(x)] <- 0

x[is.infinite(x)] <- 0   

myBreaks <- c(seq(min(x), 0, length.out=ceiling(paletteLength/2) + 1), 
              seq(max(x)/paletteLength, max(x), length.out=floor(paletteLength/2)))

myColor <- colorRampPalette(c("#9E0041", "#FEBE6E", "white","#43abea", "#13008e"))(paletteLength)

general_modu_GO_clustered_modules <- pheatmap(x, clustering_distance_rows="correlation", cluster_rows = F, cluster_cols = F, legend = T, fontsize_row  = 9, color = rev(myColor), breaks = myBreaks, main=title)

}

#E12.5_down
modules_to_use <- filter(n_of_intersected_genes_in_modules, E12.5_down_insct > 5)$module
GO_cats <- filter(E12.5_down_GO_df_top_categories, module_colors %in% modules_to_use)
GO_cats <- GO_cats[1:nrow(GO_cats) %nin% c(2),]
E12.5_down_module_pheatmap <- DE_module_heatmap_mod(E12.5_down_GO_df_all_module, GO_cats, "E12.5 downregulated")
Fig.2 Figure supplement 2. Stage-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Colors signify relative log2 fold enrichment.

Fig.2 Figure supplement 2. Stage-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Colors signify relative log2 fold enrichment.

#E12.5_down_module_pheatmap




#E14.5_up 
modules_to_use <- filter(n_of_intersected_genes_in_modules, E14.5_up_insct > 5)$module
GO_cats <- filter(E14.5_up_GO_df_top_categories, module_colors %in% modules_to_use)
GO_cats <- GO_cats[1:nrow(GO_cats) %nin% c(9, 10, 43, 54, 55, 56, 60),]
E14.5_up_module_pheatmap <- DE_module_heatmap(E14.5_up_GO_df_all_module, GO_cats, "E14.5 upregulated")
Fig.2 Figure supplement 2. Stage-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Colors signify relative log2 fold enrichment.

Fig.2 Figure supplement 2. Stage-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Colors signify relative log2 fold enrichment.

#E14.5_up_module_pheatmap


#E14.5_down
modules_to_use <- filter(n_of_intersected_genes_in_modules, E14.5_down_insct > 5)$module
GO_cats <- filter(E14.5_down_GO_df_top_categories, module_colors %in% modules_to_use)
GO_cats <- GO_cats[1:nrow(GO_cats) %nin% c(""),] # All cats were manually qualified
E14.5_down_module_pheatmap <- DE_module_heatmap(E14.5_down_GO_df_all_module, GO_cats, "E14.5 downregulated")
Fig.2 Figure supplement 2. Stage-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Colors signify relative log2 fold enrichment.

Fig.2 Figure supplement 2. Stage-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Colors signify relative log2 fold enrichment.

#E14.5_down_module_pheatmap


#E17.5_up
modules_to_use <- filter(n_of_intersected_genes_in_modules, E17.5_up_insct > 5)$module
GO_cats <- filter(E17.5_up_GO_df_top_categories, module_colors %in% modules_to_use)
GO_cats <- GO_cats[1:nrow(GO_cats) %nin% c(45, 46, 47),]
E17.5_up_module_pheatmap <- DE_module_heatmap(E17.5_up_GO_df_all_module, GO_cats, "E17.5 upregulated")
Fig.2 Figure supplement 2. Stage-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Colors signify relative log2 fold enrichment.

Fig.2 Figure supplement 2. Stage-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Colors signify relative log2 fold enrichment.

#E17.5_up_module_pheatmap

#E17.5_down
modules_to_use <- filter(n_of_intersected_genes_in_modules, E17.5_down_insct > 5)$module
GO_cats <- filter(E17.5_down_GO_df_top_categories, module_colors %in% modules_to_use)
GO_cats <- GO_cats[1:nrow(GO_cats) %nin% c(20, 26, 56, 58, 60),]
E17.5_down_module_pheatmap <- DE_module_heatmap(E17.5_down_GO_df_all_module, GO_cats, "E17.5 downregulated")
Fig.2 Figure supplement 2. Stage-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Colors signify relative log2 fold enrichment.

Fig.2 Figure supplement 2. Stage-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Colors signify relative log2 fold enrichment.

#E17.5_down_module_pheatmap

# P0 time point was analyzed in a similar fashion, considering DE genes with P < 0.05 
#
E12.5_up_GO_df_all_module$Direction <- rep("Up", nrow(E12.5_up_GO_df_all_module))
E12.5_up_GO_df_all_module$DPC <- rep("E12.5", nrow(E12.5_up_GO_df_all_module))

E12.5_down_GO_df_all_module$Direction <- rep("Down", nrow(E12.5_down_GO_df_all_module))
E12.5_down_GO_df_all_module$DPC <- rep("E12.5", nrow(E12.5_down_GO_df_all_module))


E14.5_up_GO_df_all_module$Direction <- rep("Up", nrow(E14.5_up_GO_df_all_module))
E14.5_up_GO_df_all_module$DPC <- rep("E14.5", nrow(E14.5_up_GO_df_all_module))

E14.5_down_GO_df_all_module$Direction <- rep("Down", nrow(E14.5_down_GO_df_all_module))
E14.5_down_GO_df_all_module$DPC <- rep("E14.5", nrow(E14.5_down_GO_df_all_module))

E17.5_up_GO_df_all_module$Direction <- rep("Up", nrow(E17.5_up_GO_df_all_module))
E17.5_up_GO_df_all_module$DPC <- rep("E17.5", nrow(E17.5_up_GO_df_all_module))

E17.5_down_GO_df_all_module$Direction <- rep("Down", nrow(E17.5_down_GO_df_all_module))
E17.5_down_GO_df_all_module$DPC <- rep("E17.5", nrow(E17.5_down_GO_df_all_module))


GO_df_all_module_all <- rbind(E12.5_up_GO_df_all_module,
                              E12.5_down_GO_df_all_module,
                              E14.5_up_GO_df_all_module,
                              E14.5_down_GO_df_all_module,
                              E17.5_up_GO_df_all_module,
                              E17.5_down_GO_df_all_module)

2.2.2 Module-specific heatmaps

# GO BP heatmaps with non-significant categories grayed out 

# For each module displays the top 40 GO terms. Sig P values (Fisher <  0.05) have displayed values.
# This is very useful for discovering relevant enriched GO terms.

representative_GO_terms_pheatmap <- function(mcol, title){

short.terms <- arrange(filter(GO_df_all_module_all, module_color == mcol, classicFisher < 0.05, Enrichment > 0), classicFisher)$GO.ID.Term[1:40]

a1 <- filter(E12.5_up_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6,8,9)]
b1 <- filter(E12.5_down_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6, 8,9)]
c1 <- filter(E14.5_up_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6, 8,9)]
d1 <- filter(E14.5_down_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6, 8,9)]
e1 <- filter(E17.5_up_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6, 8,9)]
f1 <- filter(E17.5_down_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6, 8,9)]


## Setting Enrichment to 0 if P > 0.05
a1$Enrichment <- ifelse(a1$classicFisher > 0.05, 0, a1$Enrichment)
b1$Enrichment <- ifelse(b1$classicFisher > 0.05, 0, b1$Enrichment)
c1$Enrichment <- ifelse(c1$classicFisher > 0.05, 0, c1$Enrichment)
d1$Enrichment <- ifelse(d1$classicFisher > 0.05, 0, d1$Enrichment)
e1$Enrichment <- ifelse(e1$classicFisher > 0.05, 0, e1$Enrichment)
f1$Enrichment <- ifelse(f1$classicFisher > 0.05, 0, f1$Enrichment)


### Enrichment matrix
df <- Reduce(function(x, y) merge(x, y, by="GO.ID.Term", all=TRUE), list(b1[,c(2,3)], d1[,c(2,3)], f1[,c(2,3)], a1[,c(2,3)], c1[,c(2,3)], e1[,c(2,3)]))

colnames(df) <- c("GO.ID.Term", "Down_E12.5", "Down_E14.5", "Down_E17.5", "Up_E12.5", "Up_E14.5", "Up_E17.5")

df[is.na(df)] <- 0
rownames(df) <- df$GO.ID.Term
df$GO.ID.Term <- NULL

x <- na.omit(as.matrix(df))

x[is.infinite(x)] <- 0   # consider applying a max/min values in a matrix to -Inf and + Inf???

### P value matrix
df_p <- Reduce(function(x, y) merge(x, y, by="GO.ID.Term", all=TRUE), list(b1[,c(1,3)], d1[,c(1,3)], f1[,c(1,3)],  a1[,c(1,3)], c1[,c(1,3)], e1[,c(1,3)]))

colnames(df_p) <- c("GO.ID.Term", "Down_E12.5", "Down_E14.5", "Down_E17.5", "Up_E12.5", "Up_E14.5", "Up_E17.5")

rownames(df_p) <- df_p$GO.ID.Term
df_p$GO.ID.Term <- NULL

df_p[df_p > 0.05] <- ""
df_p[is.na(df_p)] <- ""


myBreaks <- c(0,seq(max(x)/paletteLength, max(x), length.out=floor(paletteLength/2)))

myColor <- colorRampPalette(c("#9E0041", "#FEBE6E", "white"))(paletteLength)

pheatmap(x, clustering_distance_rows="correlation", cluster_rows = T, cluster_cols = F, legend = T, fontsize_row  = 9, color = rev(myColor), display_numbers = df_p, main=title)

}


representative_GO_terms_pheatmap("blue", "Blue module")
Module-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Only cells with enrichment P < 0.05 are colored. P values are displayed in heatmap cells. Colors signify relative log2 fold enrichment.

Module-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Only cells with enrichment P < 0.05 are colored. P values are displayed in heatmap cells. Colors signify relative log2 fold enrichment.

representative_GO_terms_pheatmap("turquoise", "Turquoise module")
Module-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Only cells with enrichment P < 0.05 are colored. P values are displayed in heatmap cells. Colors signify relative log2 fold enrichment.

Module-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Only cells with enrichment P < 0.05 are colored. P values are displayed in heatmap cells. Colors signify relative log2 fold enrichment.

representative_GO_terms_pheatmap("BrRePi", "BrRePi module")
Module-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Only cells with enrichment P < 0.05 are colored. P values are displayed in heatmap cells. Colors signify relative log2 fold enrichment.

Module-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Only cells with enrichment P < 0.05 are colored. P values are displayed in heatmap cells. Colors signify relative log2 fold enrichment.

representative_GO_terms_pheatmap("YeMaBl", "YeMaBl module")
Module-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Only cells with enrichment P < 0.05 are colored. P values are displayed in heatmap cells. Colors signify relative log2 fold enrichment.

Module-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Only cells with enrichment P < 0.05 are colored. P values are displayed in heatmap cells. Colors signify relative log2 fold enrichment.

representative_GO_terms_pheatmap("green", "Green module")
Module-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Only cells with enrichment P < 0.05 are colored. P values are displayed in heatmap cells. Colors signify relative log2 fold enrichment.

Module-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Only cells with enrichment P < 0.05 are colored. P values are displayed in heatmap cells. Colors signify relative log2 fold enrichment.

representative_GO_terms_pheatmap("grey", "Grey module")
Module-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Only cells with enrichment P < 0.05 are colored. P values are displayed in heatmap cells. Colors signify relative log2 fold enrichment.

Module-specific heatmaps of GO BP enrichment analysis of upregulated and downregulated differentially expressed genes in WGCNA modules. Only cells with enrichment P < 0.05 are colored. P values are displayed in heatmap cells. Colors signify relative log2 fold enrichment.

2.3 Initial response

2.3.1 Green module

# Fig 3b

### Green module alone ###

mcol <- "green"

# Finds top 50 terms enriched in the green module at E12.5 and E14.5 among the upregulated genes. 
short.terms <- arrange(filter(GO_df_all_module_all, module_color == mcol, Direction == "Up", DPC %in% c("E12.5", "E14.5"), classicFisher < 0.05, Enrichment > 0), classicFisher)$GO.ID.Term[1:50]

a1 <- filter(E12.5_up_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6,8,9)]
b1 <- filter(E12.5_down_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6, 8,9)]
c1 <- filter(E14.5_up_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6, 8,9)]
d1 <- filter(E14.5_down_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6, 8,9)]

# Setting Enrichment to 0 if P > 0.05
a1$Enrichment <- ifelse(a1$classicFisher > 0.05, 0, a1$Enrichment)
b1$Enrichment <- ifelse(b1$classicFisher > 0.05, 0, b1$Enrichment)
c1$Enrichment <- ifelse(c1$classicFisher > 0.05, 0, c1$Enrichment)
d1$Enrichment <- ifelse(d1$classicFisher > 0.05, 0, d1$Enrichment)


# Enrichment matrix
df <- Reduce(function(x, y) merge(x, y, by="GO.ID.Term", all=TRUE), list(b1[,c(2,3)], d1[,c(2,3)], a1[,c(2,3)], c1[,c(2,3)]))

colnames(df) <- c("GO.ID.Term", "Down_E12.5", "Down_E14.5", "Up_E12.5", "Up_E14.5")

df[is.na(df)] <- 0
rownames(df) <- df$GO.ID.Term
df$GO.ID.Term <- NULL

df_green <- df

x <- na.omit(as.matrix(df))

x[is.infinite(x)] <- 0

### P value matrix
df_p <- Reduce(function(x, y) merge(x, y, by="GO.ID.Term", all=TRUE), list(b1[,c(1,3)], d1[,c(1,3)], a1[,c(1,3)], c1[,c(1,3)]))

colnames(df_p) <- c("GO.ID.Term", "Down_E12.5", "Down_E14.5", "Up_E12.5", "Up_E14.5")

rownames(df_p) <- df_p$GO.ID.Term

df_p$GO.ID.Term <- NULL

df_p_green <- df_p

myBreaks <- c(0,seq(max(x)/paletteLength, max(x), length.out=floor(paletteLength/2)))

myColor <- colorRampPalette(c("#9E0041", "#FEBE6E", "white"))(paletteLength)

# It's not printed out to increase the clarity of this report
pheatmap(x, clustering_distance_rows="correlation", cluster_rows = T, cluster_cols = F, legend = T, fontsize_row  = 9, color = rev(myColor), main = "Green")

2.3.2 Grey module

### Grey module alone ###
mcol <- "grey"

short.terms <- arrange(filter(GO_df_all_module_all, module_color == mcol, Direction == "Up", DPC %in% c("E12.5", "E14.5"),  classicFisher < 0.05, Enrichment > 0), classicFisher)$GO.ID.Term[1:50]

a1 <- filter(E12.5_up_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6,8,9)]
b1 <- filter(E12.5_down_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6, 8,9)]
c1 <- filter(E14.5_up_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6, 8,9)]
d1 <- filter(E14.5_down_GO_df_all_module, module_color == mcol, GO.ID.Term %in% short.terms)[,c(6, 8,9)]


## Setting Enrichment to 0 if P > 0.05
a1$Enrichment <- ifelse(a1$classicFisher > 0.05, 0, a1$Enrichment)
b1$Enrichment <- ifelse(b1$classicFisher > 0.05, 0, b1$Enrichment)
c1$Enrichment <- ifelse(c1$classicFisher > 0.05, 0, c1$Enrichment)
d1$Enrichment <- ifelse(d1$classicFisher > 0.05, 0, d1$Enrichment)


### Enrichment matrix
df <- Reduce(function(x, y) merge(x, y, by="GO.ID.Term", all=TRUE), list(b1[,c(2,3)], d1[,c(2,3)], a1[,c(2,3)], c1[,c(2,3)]))

colnames(df) <- c("GO.ID.Term", "Down_E12.5", "Down_E14.5", "Up_E12.5", "Up_E14.5")

df[is.na(df)] <- 0
rownames(df) <- df$GO.ID.Term
df$GO.ID.Term <- NULL

df_grey <- df

x <- na.omit(as.matrix(df))

x[is.infinite(x)] <- 0

### P value matrix
df_p <- Reduce(function(x, y) merge(x, y, by="GO.ID.Term", all=TRUE), list(b1[,c(1,3)], d1[,c(1,3)], a1[,c(1,3)], c1[,c(1,3)]))

colnames(df_p) <- c("GO.ID.Term", "Down_E12.5", "Down_E14.5", "Up_E12.5", "Up_E14.5")

rownames(df_p) <- df_p$GO.ID.Term

df_p$GO.ID.Term <- NULL

df_p_grey <- df_p


myBreaks <- c(0,seq(max(x)/paletteLength, max(x), length.out=floor(paletteLength/2)))

myColor <- colorRampPalette(c("#9E0041", "#FEBE6E", "white"))(50)

pheatmap(x, clustering_distance_rows="correlation", cluster_rows = T, cluster_cols = F, legend = T, fontsize_row  = 9, color = rev(myColor), main = "Grey")

2.3.3 Green & Grey mod. init. response

df_green$GO.ID.Term <- rownames(df_green)
rownames(df_green) <- NULL

df_grey$GO.ID.Term <- rownames(df_grey)
rownames(df_grey) <- NULL

df_green_f <- df_green[,c(5,3,4)]
df_grey_f <- df_grey[,c(5,3,4)]

#head(df_green_f)
#dim(df_green_f)

#head(df_grey_f)
#dim(df_grey_f)


colnames(df_green_f) <- c("GO.ID.Term", "Up_E12.5_green", "Up_E14.5_green")
colnames(df_grey_f) <- c("GO.ID.Term", "Up_E12.5_grey", "Up_E14.5_grey")

df_m <- merge(df_green_f, df_grey_f, by = "GO.ID.Term", all=TRUE)

#dim(df_m)

rownames(df_m) <- df_m$GO.ID.Term
df_m$GO.ID.Term <- NULL

df_m <- df_m[,c(1,3,2,4)]

x <- as.matrix(df_m)
x[is.na(x)] <- 0

#Removes rows with 0 enrichment 
x <- x[rowSums(x)!=0, ]

myBreaks <- c(0,seq(max(x)/paletteLength, max(x), length.out=floor(paletteLength/2)))
myColor <- colorRampPalette(c("#9E0041", "#FEBE6E", "grey95"))(paletteLength)

pheatmap(x, clustering_distance_rows="correlation", cluster_rows = T, cluster_cols = F, legend = T, fontsize_row  = 9, color = rev(myColor), main = "Non-curated terms")

# Manual curation

#Curated GO BP terms
curated_list <- c(
"GO:0007263.nitric oxide mediated signal transductio...",  
"GO:0038084.vascular endothelial growth factor signa...",
"GO:0001525.angiogenesis",
"GO:0051607.defense response to virus",
"GO:0032722.positive regulation of chemokine product...",
"GO:0050715.positive regulation of cytokine secretio...",
"GO:0070374.positive regulation of ERK1 and ERK2 cas...",
"GO:0030198.extracellular matrix organization",         
"GO:0007155.cell adhesion",                              
"GO:0006909.phagocytosis",
"GO:0071604.transforming growth factor beta producti..."
  
)

#length(curated_list)

df_m$GO.ID.Term <- rownames(df_m)
df_m1 <- filter(df_m, GO.ID.Term %in% curated_list)

df_m1$GO.ID.Term <- factor(df_m1$GO.ID.Term, levels = curated_list)
df_m1 <- arrange(df_m1, GO.ID.Term)

rownames(df_m1) <- df_m1$GO.ID.Term
df_m1$GO.ID.Term <- NULL
   
x <- as.matrix(df_m1)
x[is.na(x)] <- 0

#Removes rows with 0 enrichment 
x <- x[rowSums(x)!=0, ]

myBreaks <- c(0,seq(max(x)/paletteLength, max(x), length.out=floor(paletteLength/2)))
myColor <- colorRampPalette(c("#9E0041", "#FEBE6E", "grey95"))(paletteLength)

#pheatmap(x, clustering_distance_rows="correlation", cluster_rows = F, cluster_cols = F, legend = T, #fontsize_row  = 9, color = rev(myColor), main = "Green and Grey modules capture initial response")

rownames(x) <- c(
"Nitric oxide mediated signal transduction",  
"VEGF signaling pathway",
"Angiogenesis",
"Defense response to virus",
"Positive regulation of chemokine production",
"Positive regulation of cytokine secretion",
"Positive regulation of ERK1 and ERK2 cascade",
"Extracellular matrix organization",         
"Cell adhesion",                              
"Phagocytosis",
"TGF beta production"
)

myBreaks <- c(0,seq(max(x)/paletteLength, max(x), length.out=floor(paletteLength/2)))
myColor <- colorRampPalette(c("#9E0041", "#FEBE6E", "grey95"))(paletteLength)

pheatmap(x[,2:4], clustering_distance_rows="correlation", cluster_rows = F, cluster_cols = F, legend = T, fontsize_row  = 9, color = rev(myColor), main = "Green & Grey modules capture initial resp.")  
Fig. 3b Gene set enrichment analysis of GO BP terms significantly enriched among DE genes (FDR < 0.05) at E12.5 and E14.5 in Green and Grey modules showing upregulation of angiogenesis and immune pathways. Representative enriched GO BP terms with p < 0.05 colored by enrichment; gray represents enrichment p > 0.05 (Fisher’s exact test). Colors signify relative log2 fold enrichment.

Fig. 3b Gene set enrichment analysis of GO BP terms significantly enriched among DE genes (FDR < 0.05) at E12.5 and E14.5 in Green and Grey modules showing upregulation of angiogenesis and immune pathways. Representative enriched GO BP terms with p < 0.05 colored by enrichment; gray represents enrichment p > 0.05 (Fisher’s exact test). Colors signify relative log2 fold enrichment.

3. R sessionInfo()

library(pander)

pander(sessionInfo())

R version 4.0.2 (2020-06-22)

Platform: x86_64-w64-mingw32/x64 (64-bit)

locale: LC_COLLATE=English_United States.1252, LC_CTYPE=English_United States.1252, LC_MONETARY=English_United States.1252, LC_NUMERIC=C and LC_TIME=English_United States.1252

attached base packages: grid, stats4, parallel, stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: pander(v.0.6.3), topGO(v.2.40.0), SparseM(v.1.78), GO.db(v.3.11.4), graph(v.1.66.0), WGCNA(v.1.69), fastcluster(v.1.1.25), dynamicTreeCut(v.1.63-1), Hmisc(v.4.4-2), Formula(v.1.2-4), survival(v.3.1-12), lattice(v.0.20-41), plotly(v.4.9.2.2), plyr(v.1.8.6), gridExtra(v.2.3), ggrepel(v.0.8.2), knitr(v.1.30), RColorBrewer(v.1.1-2), DT(v.0.16), data.table(v.1.12.8), dplyr(v.1.0.0), reshape2(v.1.4.4), ggplot2(v.3.3.2), mclust(v.5.4.7), GenomicFeatures(v.1.40.1), AnnotationDbi(v.1.50.3), Biobase(v.2.48.0), GenomicRanges(v.1.40.0), GenomeInfoDb(v.1.24.2), IRanges(v.2.22.2), S4Vectors(v.0.26.1), BiocGenerics(v.0.34.0), edgeR(v.3.30.3), limma(v.3.44.3), pheatmap(v.1.0.12), RUVnormalize(v.1.22.0), sva(v.3.36.0), BiocParallel(v.1.22.0), genefilter(v.1.70.0), mgcv(v.1.8-31) and nlme(v.3.1-148)

loaded via a namespace (and not attached): backports(v.1.1.7), BiocFileCache(v.1.12.1), lazyeval(v.0.2.2), splines(v.4.0.2), digest(v.0.6.25), foreach(v.1.5.1), htmltools(v.0.5.0), magrittr(v.2.0.1), checkmate(v.2.0.0), memoise(v.1.1.0), cluster(v.2.1.0), doParallel(v.1.0.16), Biostrings(v.2.56.0), annotate(v.1.66.0), matrixStats(v.0.57.0), askpass(v.1.1), prettyunits(v.1.1.1), jpeg(v.0.1-8.1), colorspace(v.1.4-1), blob(v.1.2.1), rappdirs(v.0.3.1), xfun(v.0.15), crayon(v.1.3.4), RCurl(v.1.98-1.2), jsonlite(v.1.7.2), impute(v.1.62.0), iterators(v.1.0.13), glue(v.1.4.1), gtable(v.0.3.0), zlibbioc(v.1.34.0), XVector(v.0.28.0), DelayedArray(v.0.14.1), scales(v.1.1.1), DBI(v.1.1.0), Rcpp(v.1.0.6), viridisLite(v.0.3.0), xtable(v.1.8-4), progress(v.1.2.2), htmlTable(v.2.1.0), foreign(v.0.8-80), bit(v.4.0.4), preprocessCore(v.1.50.0), htmlwidgets(v.1.5.3), httr(v.1.4.2), ellipsis(v.0.3.1), pkgconfig(v.2.0.3), XML(v.3.99-0.3), farver(v.2.0.3), nnet(v.7.3-14), dbplyr(v.2.0.0), locfit(v.1.5-9.4), tidyselect(v.1.1.0), labeling(v.0.4.2), rlang(v.0.4.9), munsell(v.0.5.0), tools(v.4.0.2), generics(v.0.1.0), RSQLite(v.2.2.1), evaluate(v.0.14), stringr(v.1.4.0), yaml(v.2.2.1), bit64(v.4.0.5), purrr(v.0.3.4), RUVnormalizeData(v.1.8.0), xml2(v.1.3.2), biomaRt(v.2.44.4), compiler(v.4.0.2), rstudioapi(v.0.13), curl(v.4.3), png(v.0.1-7), tibble(v.3.0.1), stringi(v.1.4.6), highr(v.0.8), Matrix(v.1.2-18), vctrs(v.0.3.0), pillar(v.1.4.7), lifecycle(v.0.2.0), bitops(v.1.0-6), rtracklayer(v.1.48.0), R6(v.2.5.0), latticeExtra(v.0.6-29), codetools(v.0.2-16), assertthat(v.0.2.1), SummarizedExperiment(v.1.18.2), openssl(v.1.4.3), withr(v.2.3.0), GenomicAlignments(v.1.24.0), Rsamtools(v.2.4.0), GenomeInfoDbData(v.1.2.3), hms(v.0.5.3), rpart(v.4.1-15), tidyr(v.1.1.0), rmarkdown(v.2.6) and base64enc(v.0.1-3)

#save.image("Workspace_recalculated_GO_BP_modules_3_15_2021.RData")
#load("Workspace_recalculated_GO_BP_modules_3_15_2021.RData")